/* { dg-do run } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O2 -msse4.1" } */

#ifndef CHECK_H
#define CHECK_H "sse4_1-check.h"
#endif

#ifndef TEST
#define TEST sse4_1_test
#endif

#include CHECK_H

#include <smmintrin.h>
#include <string.h>

#define msk0 0x41
#define msk1 0x90
#define msk2 0xe9
#define msk3 0x70

#define msk4 0xFC
#define msk5 0x05
#define msk6 0x0A
#define msk7 0x0F

union
  {
    __m128 x;
    float f[4];
  } val1;

static void
TEST (void)
{
  union
    {
      __m128 x;
      float f[4];
    } res[8], val2, tmp;
  int masks[8];
  int i, j;

  val2.f[0] = 55.0;
  val2.f[1] = 55.0;
  val2.f[2] = 55.0;
  val2.f[3] = 55.0;

  val1.f[0] = 1.;
  val1.f[1] = 2.;
  val1.f[2] = 3.;
  val1.f[3] = 4.;

  asm volatile ("" : "+m" (val1));
  res[0].x = _mm_insert_ps (val2.x, val1.x, msk0);
  asm volatile ("" : "+m" (val1));
  res[1].x = _mm_insert_ps (val2.x, val1.x, msk1);
  asm volatile ("" : "+m" (val1));
  res[2].x = _mm_insert_ps (val2.x, val1.x, msk2);
  asm volatile ("" : "+m" (val1));
  res[3].x = _mm_insert_ps (val2.x, val1.x, msk3);

  masks[0] = msk0;
  masks[1] = msk1;
  masks[2] = msk2;
  masks[3] = msk3;

  for (i = 0; i < 4; i++)
    {
      asm volatile ("" : "+m" (val1));
      res[i + 4].x = _mm_insert_ps (val2.x, val1.x, msk4);
    }

  masks[4] = msk4;
  masks[5] = msk4;
  masks[6] = msk4;
  masks[7] = msk4;

  for (i=0; i < 8; i++)
    {
      tmp = val2;
      tmp.f[(masks[i] & 0x30) >> 4] = val1.f[(masks[i] & 0xC0) >> 6];

      for (j = 0; j < 4; j++)
	if (masks[i] & (0x1 << j))
	  tmp.f[j] = 0.f;

      if (memcmp (&res[i], &tmp, sizeof (tmp)))
	abort ();
    }
}