#include <machine/asm.h>

.globl	ossl_vaes_vpclmulqdq_capable
.type	ossl_vaes_vpclmulqdq_capable,@function
.align	32
ossl_vaes_vpclmulqdq_capable:
	movq	OPENSSL_ia32cap_P+8(%rip),%rcx

	movq	$6600291188736,%rdx
	xorl	%eax,%eax
	andq	%rdx,%rcx
	cmpq	%rdx,%rcx
	cmoveq	%rcx,%rax
	.byte	0xf3,0xc3
.size	ossl_vaes_vpclmulqdq_capable, .-ossl_vaes_vpclmulqdq_capable
.text	
.globl	ossl_aes_gcm_init_avx512
.type	ossl_aes_gcm_init_avx512,@function
.align	32
ossl_aes_gcm_init_avx512:
.cfi_startproc	
.byte	243,15,30,250
	vpxorq	%xmm16,%xmm16,%xmm16


	movl	240(%rdi),%eax
	cmpl	$9,%eax
	je	.Laes_128_0
	cmpl	$11,%eax
	je	.Laes_192_0
	cmpl	$13,%eax
	je	.Laes_256_0
	jmp	.Lexit_aes_0
.align	32
.Laes_128_0:
	vpxorq	0(%rdi),%xmm16,%xmm16

	vaesenc	16(%rdi),%xmm16,%xmm16

	vaesenc	32(%rdi),%xmm16,%xmm16

	vaesenc	48(%rdi),%xmm16,%xmm16

	vaesenc	64(%rdi),%xmm16,%xmm16

	vaesenc	80(%rdi),%xmm16,%xmm16

	vaesenc	96(%rdi),%xmm16,%xmm16

	vaesenc	112(%rdi),%xmm16,%xmm16

	vaesenc	128(%rdi),%xmm16,%xmm16

	vaesenc	144(%rdi),%xmm16,%xmm16

	vaesenclast	160(%rdi),%xmm16,%xmm16
	jmp	.Lexit_aes_0
.align	32
.Laes_192_0:
	vpxorq	0(%rdi),%xmm16,%xmm16

	vaesenc	16(%rdi),%xmm16,%xmm16

	vaesenc	32(%rdi),%xmm16,%xmm16

	vaesenc	48(%rdi),%xmm16,%xmm16

	vaesenc	64(%rdi),%xmm16,%xmm16

	vaesenc	80(%rdi),%xmm16,%xmm16

	vaesenc	96(%rdi),%xmm16,%xmm16

	vaesenc	112(%rdi),%xmm16,%xmm16

	vaesenc	128(%rdi),%xmm16,%xmm16

	vaesenc	144(%rdi),%xmm16,%xmm16

	vaesenc	160(%rdi),%xmm16,%xmm16

	vaesenc	176(%rdi),%xmm16,%xmm16

	vaesenclast	192(%rdi),%xmm16,%xmm16
	jmp	.Lexit_aes_0
.align	32
.Laes_256_0:
	vpxorq	0(%rdi),%xmm16,%xmm16

	vaesenc	16(%rdi),%xmm16,%xmm16

	vaesenc	32(%rdi),%xmm16,%xmm16

	vaesenc	48(%rdi),%xmm16,%xmm16

	vaesenc	64(%rdi),%xmm16,%xmm16

	vaesenc	80(%rdi),%xmm16,%xmm16

	vaesenc	96(%rdi),%xmm16,%xmm16

	vaesenc	112(%rdi),%xmm16,%xmm16

	vaesenc	128(%rdi),%xmm16,%xmm16

	vaesenc	144(%rdi),%xmm16,%xmm16

	vaesenc	160(%rdi),%xmm16,%xmm16

	vaesenc	176(%rdi),%xmm16,%xmm16

	vaesenc	192(%rdi),%xmm16,%xmm16

	vaesenc	208(%rdi),%xmm16,%xmm16

	vaesenclast	224(%rdi),%xmm16,%xmm16
	jmp	.Lexit_aes_0
.Lexit_aes_0:

	vpshufb	SHUF_MASK(%rip),%xmm16,%xmm16

	vmovdqa64	%xmm16,%xmm2
	vpsllq	$1,%xmm16,%xmm16
	vpsrlq	$63,%xmm2,%xmm2
	vmovdqa	%xmm2,%xmm1
	vpslldq	$8,%xmm2,%xmm2
	vpsrldq	$8,%xmm1,%xmm1
	vporq	%xmm2,%xmm16,%xmm16

	vpshufd	$36,%xmm1,%xmm2
	vpcmpeqd	TWOONE(%rip),%xmm2,%xmm2
	vpand	POLY(%rip),%xmm2,%xmm2
	vpxorq	%xmm2,%xmm16,%xmm16

	vmovdqu64	%xmm16,336(%rsi)
	vshufi32x4	$0x00,%ymm16,%ymm16,%ymm4
	vmovdqa	%ymm4,%ymm3

	vpclmulqdq	$0x11,%ymm4,%ymm3,%ymm0
	vpclmulqdq	$0x00,%ymm4,%ymm3,%ymm1
	vpclmulqdq	$0x01,%ymm4,%ymm3,%ymm2
	vpclmulqdq	$0x10,%ymm4,%ymm3,%ymm3
	vpxorq	%ymm2,%ymm3,%ymm3

	vpsrldq	$8,%ymm3,%ymm2
	vpslldq	$8,%ymm3,%ymm3
	vpxorq	%ymm2,%ymm0,%ymm0
	vpxorq	%ymm1,%ymm3,%ymm3



	vmovdqu64	POLY2(%rip),%ymm2

	vpclmulqdq	$0x01,%ymm3,%ymm2,%ymm1
	vpslldq	$8,%ymm1,%ymm1
	vpxorq	%ymm1,%ymm3,%ymm3



	vpclmulqdq	$0x00,%ymm3,%ymm2,%ymm1
	vpsrldq	$4,%ymm1,%ymm1
	vpclmulqdq	$0x10,%ymm3,%ymm2,%ymm3
	vpslldq	$4,%ymm3,%ymm3

	vpternlogq	$0x96,%ymm1,%ymm0,%ymm3

	vmovdqu64	%xmm3,320(%rsi)
	vinserti64x2	$1,%xmm16,%ymm3,%ymm4
	vmovdqa64	%ymm4,%ymm5

	vpclmulqdq	$0x11,%ymm3,%ymm4,%ymm0
	vpclmulqdq	$0x00,%ymm3,%ymm4,%ymm1
	vpclmulqdq	$0x01,%ymm3,%ymm4,%ymm2
	vpclmulqdq	$0x10,%ymm3,%ymm4,%ymm4
	vpxorq	%ymm2,%ymm4,%ymm4

	vpsrldq	$8,%ymm4,%ymm2
	vpslldq	$8,%ymm4,%ymm4
	vpxorq	%ymm2,%ymm0,%ymm0
	vpxorq	%ymm1,%ymm4,%ymm4



	vmovdqu64	POLY2(%rip),%ymm2

	vpclmulqdq	$0x01,%ymm4,%ymm2,%ymm1
	vpslldq	$8,%ymm1,%ymm1
	vpxorq	%ymm1,%ymm4,%ymm4



	vpclmulqdq	$0x00,%ymm4,%ymm2,%ymm1
	vpsrldq	$4,%ymm1,%ymm1
	vpclmulqdq	$0x10,%ymm4,%ymm2,%ymm4
	vpslldq	$4,%ymm4,%ymm4

	vpternlogq	$0x96,%ymm1,%ymm0,%ymm4

	vmovdqu64	%ymm4,288(%rsi)

	vinserti64x4	$1,%ymm5,%zmm4,%zmm4


	vshufi64x2	$0x00,%zmm4,%zmm4,%zmm3
	vmovdqa64	%zmm4,%zmm5

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm0
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm1
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm2
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm2,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm2
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm2,%zmm0,%zmm0
	vpxorq	%zmm1,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm2

	vpclmulqdq	$0x01,%zmm4,%zmm2,%zmm1
	vpslldq	$8,%zmm1,%zmm1
	vpxorq	%zmm1,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm2,%zmm1
	vpsrldq	$4,%zmm1,%zmm1
	vpclmulqdq	$0x10,%zmm4,%zmm2,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm1,%zmm0,%zmm4

	vmovdqu64	%zmm4,224(%rsi)
	vshufi64x2	$0x00,%zmm4,%zmm4,%zmm3

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm0
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm1
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm2
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm2,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm2
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm2,%zmm0,%zmm0
	vpxorq	%zmm1,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm2

	vpclmulqdq	$0x01,%zmm5,%zmm2,%zmm1
	vpslldq	$8,%zmm1,%zmm1
	vpxorq	%zmm1,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm2,%zmm1
	vpsrldq	$4,%zmm1,%zmm1
	vpclmulqdq	$0x10,%zmm5,%zmm2,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm1,%zmm0,%zmm5

	vmovdqu64	%zmm5,160(%rsi)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm0
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm1
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm2
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm2,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm2
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm2,%zmm0,%zmm0
	vpxorq	%zmm1,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm2

	vpclmulqdq	$0x01,%zmm4,%zmm2,%zmm1
	vpslldq	$8,%zmm1,%zmm1
	vpxorq	%zmm1,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm2,%zmm1
	vpsrldq	$4,%zmm1,%zmm1
	vpclmulqdq	$0x10,%zmm4,%zmm2,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm1,%zmm0,%zmm4

	vmovdqu64	%zmm4,96(%rsi)
	vzeroupper
.Labort_init:
	.byte	0xf3,0xc3
.cfi_endproc	
.size	ossl_aes_gcm_init_avx512, .-ossl_aes_gcm_init_avx512
.globl	ossl_aes_gcm_setiv_avx512
.type	ossl_aes_gcm_setiv_avx512,@function
.align	32
ossl_aes_gcm_setiv_avx512:
.cfi_startproc	
.Lsetiv_seh_begin:
.byte	243,15,30,250
	pushq	%rbx
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbx,-16
.Lsetiv_seh_push_rbx:
	pushq	%rbp
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbp,-24
.Lsetiv_seh_push_rbp:
	pushq	%r12
.cfi_adjust_cfa_offset	8
.cfi_offset	%r12,-32
.Lsetiv_seh_push_r12:
	pushq	%r13
.cfi_adjust_cfa_offset	8
.cfi_offset	%r13,-40
.Lsetiv_seh_push_r13:
	pushq	%r14
.cfi_adjust_cfa_offset	8
.cfi_offset	%r14,-48
.Lsetiv_seh_push_r14:
	pushq	%r15
.cfi_adjust_cfa_offset	8
.cfi_offset	%r15,-56
.Lsetiv_seh_push_r15:










	leaq	0(%rsp),%rbp
.cfi_def_cfa_register	%rbp
.Lsetiv_seh_setfp:

.Lsetiv_seh_prolog_end:
	subq	$820,%rsp
	andq	$(-64),%rsp
	cmpq	$12,%rcx
	je	iv_len_12_init_IV
	vpxor	%xmm2,%xmm2,%xmm2
	movq	%rdx,%r10
	movq	%rcx,%r11
	orq	%r11,%r11
	jz	.L_CALC_AAD_done_1

	xorq	%rbx,%rbx
	vmovdqa64	SHUF_MASK(%rip),%zmm16

.L_get_AAD_loop48x16_1:
	cmpq	$768,%r11
	jl	.L_exit_AAD_loop48x16_1
	vmovdqu64	0(%r10),%zmm11
	vmovdqu64	64(%r10),%zmm3
	vmovdqu64	128(%r10),%zmm4
	vmovdqu64	192(%r10),%zmm5
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	testq	%rbx,%rbx
	jnz	.L_skip_hkeys_precomputation_2

	vmovdqu64	288(%rsi),%zmm1
	vmovdqu64	%zmm1,704(%rsp)

	vmovdqu64	224(%rsi),%zmm9
	vmovdqu64	%zmm9,640(%rsp)


	vshufi64x2	$0x00,%zmm9,%zmm9,%zmm9

	vmovdqu64	160(%rsi),%zmm10
	vmovdqu64	%zmm10,576(%rsp)

	vmovdqu64	96(%rsi),%zmm12
	vmovdqu64	%zmm12,512(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm10,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm10,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm10,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm10,%zmm10

	vpsrldq	$8,%zmm10,%zmm17
	vpslldq	$8,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm10,%zmm10



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm10,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm10,%zmm10



	vpclmulqdq	$0x00,%zmm10,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm10,%zmm17,%zmm10
	vpslldq	$4,%zmm10,%zmm10

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm10

	vmovdqu64	%zmm10,448(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm12,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm12,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm12,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm12,%zmm12

	vpsrldq	$8,%zmm12,%zmm17
	vpslldq	$8,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm12,%zmm12



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm12,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm12,%zmm12



	vpclmulqdq	$0x00,%zmm12,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm12,%zmm17,%zmm12
	vpslldq	$4,%zmm12,%zmm12

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm12

	vmovdqu64	%zmm12,384(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm10,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm10,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm10,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm10,%zmm10

	vpsrldq	$8,%zmm10,%zmm17
	vpslldq	$8,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm10,%zmm10



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm10,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm10,%zmm10



	vpclmulqdq	$0x00,%zmm10,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm10,%zmm17,%zmm10
	vpslldq	$4,%zmm10,%zmm10

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm10

	vmovdqu64	%zmm10,320(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm12,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm12,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm12,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm12,%zmm12

	vpsrldq	$8,%zmm12,%zmm17
	vpslldq	$8,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm12,%zmm12



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm12,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm12,%zmm12



	vpclmulqdq	$0x00,%zmm12,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm12,%zmm17,%zmm12
	vpslldq	$4,%zmm12,%zmm12

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm12

	vmovdqu64	%zmm12,256(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm10,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm10,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm10,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm10,%zmm10

	vpsrldq	$8,%zmm10,%zmm17
	vpslldq	$8,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm10,%zmm10



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm10,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm10,%zmm10



	vpclmulqdq	$0x00,%zmm10,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm10,%zmm17,%zmm10
	vpslldq	$4,%zmm10,%zmm10

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm10

	vmovdqu64	%zmm10,192(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm12,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm12,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm12,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm12,%zmm12

	vpsrldq	$8,%zmm12,%zmm17
	vpslldq	$8,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm12,%zmm12



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm12,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm12,%zmm12



	vpclmulqdq	$0x00,%zmm12,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm12,%zmm17,%zmm12
	vpslldq	$4,%zmm12,%zmm12

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm12

	vmovdqu64	%zmm12,128(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm10,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm10,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm10,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm10,%zmm10

	vpsrldq	$8,%zmm10,%zmm17
	vpslldq	$8,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm10,%zmm10



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm10,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm10,%zmm10



	vpclmulqdq	$0x00,%zmm10,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm10,%zmm17,%zmm10
	vpslldq	$4,%zmm10,%zmm10

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm10

	vmovdqu64	%zmm10,64(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm12,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm12,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm12,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm12,%zmm12

	vpsrldq	$8,%zmm12,%zmm17
	vpslldq	$8,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm12,%zmm12



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm12,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm12,%zmm12



	vpclmulqdq	$0x00,%zmm12,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm12,%zmm17,%zmm12
	vpslldq	$4,%zmm12,%zmm12

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm12

	vmovdqu64	%zmm12,0(%rsp)
.L_skip_hkeys_precomputation_2:
	movq	$1,%rbx
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	0(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm11,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm11,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm11,%zmm12
	vmovdqu64	64(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm3,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm3,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm3,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm3,%zmm18
	vpxorq	%zmm17,%zmm10,%zmm7
	vpxorq	%zmm13,%zmm1,%zmm6
	vpxorq	%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	128(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm4,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm4,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm4,%zmm12
	vmovdqu64	192(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm5,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm5,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm5,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm5,%zmm18

	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	256(%r10),%zmm11
	vmovdqu64	320(%r10),%zmm3
	vmovdqu64	384(%r10),%zmm4
	vmovdqu64	448(%r10),%zmm5
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	vmovdqu64	256(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm11,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm11,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm11,%zmm12
	vmovdqu64	320(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm3,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm3,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm3,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm3,%zmm18
	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	384(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm4,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm4,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm4,%zmm12
	vmovdqu64	448(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm5,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm5,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm5,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm5,%zmm18

	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	512(%r10),%zmm11
	vmovdqu64	576(%r10),%zmm3
	vmovdqu64	640(%r10),%zmm4
	vmovdqu64	704(%r10),%zmm5
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	vmovdqu64	512(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm11,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm11,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm11,%zmm12
	vmovdqu64	576(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm3,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm3,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm3,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm3,%zmm18
	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	640(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm4,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm4,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm4,%zmm12
	vmovdqu64	704(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm5,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm5,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm5,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm5,%zmm18

	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7

	vpsrldq	$8,%zmm7,%zmm1
	vpslldq	$8,%zmm7,%zmm9
	vpxorq	%zmm1,%zmm6,%zmm6
	vpxorq	%zmm9,%zmm8,%zmm8
	vextracti64x4	$1,%zmm6,%ymm1
	vpxorq	%ymm1,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm1
	vpxorq	%xmm1,%xmm6,%xmm6
	vextracti64x4	$1,%zmm8,%ymm9
	vpxorq	%ymm9,%ymm8,%ymm8
	vextracti32x4	$1,%ymm8,%xmm9
	vpxorq	%xmm9,%xmm8,%xmm8
	vmovdqa64	POLY2(%rip),%xmm10


	vpclmulqdq	$0x01,%xmm8,%xmm10,%xmm1
	vpslldq	$8,%xmm1,%xmm1
	vpxorq	%xmm1,%xmm8,%xmm1


	vpclmulqdq	$0x00,%xmm1,%xmm10,%xmm9
	vpsrldq	$4,%xmm9,%xmm9
	vpclmulqdq	$0x10,%xmm1,%xmm10,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm6,%xmm9,%xmm2

	subq	$768,%r11
	je	.L_CALC_AAD_done_1

	addq	$768,%r10
	jmp	.L_get_AAD_loop48x16_1

.L_exit_AAD_loop48x16_1:

	cmpq	$512,%r11
	jl	.L_less_than_32x16_1

	vmovdqu64	0(%r10),%zmm11
	vmovdqu64	64(%r10),%zmm3
	vmovdqu64	128(%r10),%zmm4
	vmovdqu64	192(%r10),%zmm5
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	testq	%rbx,%rbx
	jnz	.L_skip_hkeys_precomputation_3

	vmovdqu64	288(%rsi),%zmm1
	vmovdqu64	%zmm1,704(%rsp)

	vmovdqu64	224(%rsi),%zmm9
	vmovdqu64	%zmm9,640(%rsp)


	vshufi64x2	$0x00,%zmm9,%zmm9,%zmm9

	vmovdqu64	160(%rsi),%zmm10
	vmovdqu64	%zmm10,576(%rsp)

	vmovdqu64	96(%rsi),%zmm12
	vmovdqu64	%zmm12,512(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm10,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm10,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm10,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm10,%zmm10

	vpsrldq	$8,%zmm10,%zmm17
	vpslldq	$8,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm10,%zmm10



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm10,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm10,%zmm10



	vpclmulqdq	$0x00,%zmm10,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm10,%zmm17,%zmm10
	vpslldq	$4,%zmm10,%zmm10

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm10

	vmovdqu64	%zmm10,448(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm12,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm12,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm12,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm12,%zmm12

	vpsrldq	$8,%zmm12,%zmm17
	vpslldq	$8,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm12,%zmm12



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm12,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm12,%zmm12



	vpclmulqdq	$0x00,%zmm12,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm12,%zmm17,%zmm12
	vpslldq	$4,%zmm12,%zmm12

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm12

	vmovdqu64	%zmm12,384(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm10,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm10,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm10,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm10,%zmm10

	vpsrldq	$8,%zmm10,%zmm17
	vpslldq	$8,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm10,%zmm10



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm10,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm10,%zmm10



	vpclmulqdq	$0x00,%zmm10,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm10,%zmm17,%zmm10
	vpslldq	$4,%zmm10,%zmm10

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm10

	vmovdqu64	%zmm10,320(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm12,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm12,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm12,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm12,%zmm12

	vpsrldq	$8,%zmm12,%zmm17
	vpslldq	$8,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm12,%zmm12



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm12,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm12,%zmm12



	vpclmulqdq	$0x00,%zmm12,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm12,%zmm17,%zmm12
	vpslldq	$4,%zmm12,%zmm12

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm12

	vmovdqu64	%zmm12,256(%rsp)
.L_skip_hkeys_precomputation_3:
	movq	$1,%rbx
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	256(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm11,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm11,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm11,%zmm12
	vmovdqu64	320(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm3,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm3,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm3,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm3,%zmm18
	vpxorq	%zmm17,%zmm10,%zmm7
	vpxorq	%zmm13,%zmm1,%zmm6
	vpxorq	%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	384(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm4,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm4,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm4,%zmm12
	vmovdqu64	448(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm5,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm5,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm5,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm5,%zmm18

	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	256(%r10),%zmm11
	vmovdqu64	320(%r10),%zmm3
	vmovdqu64	384(%r10),%zmm4
	vmovdqu64	448(%r10),%zmm5
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	vmovdqu64	512(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm11,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm11,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm11,%zmm12
	vmovdqu64	576(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm3,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm3,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm3,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm3,%zmm18
	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	640(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm4,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm4,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm4,%zmm12
	vmovdqu64	704(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm5,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm5,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm5,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm5,%zmm18

	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7

	vpsrldq	$8,%zmm7,%zmm1
	vpslldq	$8,%zmm7,%zmm9
	vpxorq	%zmm1,%zmm6,%zmm6
	vpxorq	%zmm9,%zmm8,%zmm8
	vextracti64x4	$1,%zmm6,%ymm1
	vpxorq	%ymm1,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm1
	vpxorq	%xmm1,%xmm6,%xmm6
	vextracti64x4	$1,%zmm8,%ymm9
	vpxorq	%ymm9,%ymm8,%ymm8
	vextracti32x4	$1,%ymm8,%xmm9
	vpxorq	%xmm9,%xmm8,%xmm8
	vmovdqa64	POLY2(%rip),%xmm10


	vpclmulqdq	$0x01,%xmm8,%xmm10,%xmm1
	vpslldq	$8,%xmm1,%xmm1
	vpxorq	%xmm1,%xmm8,%xmm1


	vpclmulqdq	$0x00,%xmm1,%xmm10,%xmm9
	vpsrldq	$4,%xmm9,%xmm9
	vpclmulqdq	$0x10,%xmm1,%xmm10,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm6,%xmm9,%xmm2

	subq	$512,%r11
	je	.L_CALC_AAD_done_1

	addq	$512,%r10
	jmp	.L_less_than_16x16_1

.L_less_than_32x16_1:
	cmpq	$256,%r11
	jl	.L_less_than_16x16_1

	vmovdqu64	0(%r10),%zmm11
	vmovdqu64	64(%r10),%zmm3
	vmovdqu64	128(%r10),%zmm4
	vmovdqu64	192(%r10),%zmm5
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	96(%rsi),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm11,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm11,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm11,%zmm12
	vmovdqu64	160(%rsi),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm3,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm3,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm3,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm3,%zmm18
	vpxorq	%zmm17,%zmm10,%zmm7
	vpxorq	%zmm13,%zmm1,%zmm6
	vpxorq	%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	224(%rsi),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm4,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm4,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm4,%zmm12
	vmovdqu64	288(%rsi),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm5,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm5,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm5,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm5,%zmm18

	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7

	vpsrldq	$8,%zmm7,%zmm1
	vpslldq	$8,%zmm7,%zmm9
	vpxorq	%zmm1,%zmm6,%zmm6
	vpxorq	%zmm9,%zmm8,%zmm8
	vextracti64x4	$1,%zmm6,%ymm1
	vpxorq	%ymm1,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm1
	vpxorq	%xmm1,%xmm6,%xmm6
	vextracti64x4	$1,%zmm8,%ymm9
	vpxorq	%ymm9,%ymm8,%ymm8
	vextracti32x4	$1,%ymm8,%xmm9
	vpxorq	%xmm9,%xmm8,%xmm8
	vmovdqa64	POLY2(%rip),%xmm10


	vpclmulqdq	$0x01,%xmm8,%xmm10,%xmm1
	vpslldq	$8,%xmm1,%xmm1
	vpxorq	%xmm1,%xmm8,%xmm1


	vpclmulqdq	$0x00,%xmm1,%xmm10,%xmm9
	vpsrldq	$4,%xmm9,%xmm9
	vpclmulqdq	$0x10,%xmm1,%xmm10,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm6,%xmm9,%xmm2

	subq	$256,%r11
	je	.L_CALC_AAD_done_1

	addq	$256,%r10

.L_less_than_16x16_1:

	leaq	byte64_len_to_mask_table(%rip),%r12
	leaq	(%r12,%r11,8),%r12


	addl	$15,%r11d
	shrl	$4,%r11d
	cmpl	$2,%r11d
	jb	.L_AAD_blocks_1_1
	je	.L_AAD_blocks_2_1
	cmpl	$4,%r11d
	jb	.L_AAD_blocks_3_1
	je	.L_AAD_blocks_4_1
	cmpl	$6,%r11d
	jb	.L_AAD_blocks_5_1
	je	.L_AAD_blocks_6_1
	cmpl	$8,%r11d
	jb	.L_AAD_blocks_7_1
	je	.L_AAD_blocks_8_1
	cmpl	$10,%r11d
	jb	.L_AAD_blocks_9_1
	je	.L_AAD_blocks_10_1
	cmpl	$12,%r11d
	jb	.L_AAD_blocks_11_1
	je	.L_AAD_blocks_12_1
	cmpl	$14,%r11d
	jb	.L_AAD_blocks_13_1
	je	.L_AAD_blocks_14_1
	cmpl	$15,%r11d
	je	.L_AAD_blocks_15_1
.L_AAD_blocks_16_1:
	subq	$1536,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%zmm4
	vmovdqu8	192(%r10),%zmm5{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	96(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	160(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vmovdqu64	224(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x00,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm9,%zmm11,%zmm1
	vpternlogq	$0x96,%zmm10,%zmm3,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x10,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm12,%zmm11,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm3,%zmm8
	vmovdqu64	288(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm5,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm5,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm5,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm5,%zmm13
	vpxorq	%zmm9,%zmm1,%zmm9
	vpxorq	%zmm10,%zmm6,%zmm10
	vpxorq	%zmm12,%zmm7,%zmm12
	vpxorq	%zmm13,%zmm8,%zmm13

	vpxorq	%zmm13,%zmm12,%zmm12
	vpsrldq	$8,%zmm12,%zmm7
	vpslldq	$8,%zmm12,%zmm8
	vpxorq	%zmm7,%zmm9,%zmm1
	vpxorq	%zmm8,%zmm10,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

	jmp	.L_CALC_AAD_done_1
.L_AAD_blocks_15_1:
	subq	$1536,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%zmm4
	vmovdqu8	192(%r10),%zmm5{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	112(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	176(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vmovdqu64	240(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x00,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm1,%zmm11,%zmm9
	vpternlogq	$0x96,%zmm6,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x10,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm7,%zmm11,%zmm12
	vpternlogq	$0x96,%zmm8,%zmm3,%zmm13
	vmovdqu64	304(%rsi),%ymm15
	vinserti64x2	$2,336(%rsi),%zmm15,%zmm15
	vpclmulqdq	$0x01,%zmm15,%zmm5,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm5,%zmm8
	vpclmulqdq	$0x11,%zmm15,%zmm5,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm5,%zmm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

	jmp	.L_CALC_AAD_done_1
.L_AAD_blocks_14_1:
	subq	$1536,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%zmm4
	vmovdqu8	192(%r10),%ymm5{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%ymm16,%ymm5,%ymm5
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	128(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	192(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vmovdqu64	256(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x00,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm1,%zmm11,%zmm9
	vpternlogq	$0x96,%zmm6,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x10,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm7,%zmm11,%zmm12
	vpternlogq	$0x96,%zmm8,%zmm3,%zmm13
	vmovdqu64	320(%rsi),%ymm15
	vpclmulqdq	$0x01,%ymm15,%ymm5,%ymm7
	vpclmulqdq	$0x10,%ymm15,%ymm5,%ymm8
	vpclmulqdq	$0x11,%ymm15,%ymm5,%ymm1
	vpclmulqdq	$0x00,%ymm15,%ymm5,%ymm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

	jmp	.L_CALC_AAD_done_1
.L_AAD_blocks_13_1:
	subq	$1536,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%zmm4
	vmovdqu8	192(%r10),%xmm5{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%xmm16,%xmm5,%xmm5
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	144(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	208(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vmovdqu64	272(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x00,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm1,%zmm11,%zmm9
	vpternlogq	$0x96,%zmm6,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x10,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm7,%zmm11,%zmm12
	vpternlogq	$0x96,%zmm8,%zmm3,%zmm13
	vmovdqu64	336(%rsi),%xmm15
	vpclmulqdq	$0x01,%xmm15,%xmm5,%xmm7
	vpclmulqdq	$0x10,%xmm15,%xmm5,%xmm8
	vpclmulqdq	$0x11,%xmm15,%xmm5,%xmm1
	vpclmulqdq	$0x00,%xmm15,%xmm5,%xmm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

	jmp	.L_CALC_AAD_done_1
.L_AAD_blocks_12_1:
	subq	$1024,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%zmm4{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	160(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	224(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vmovdqu64	288(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x00,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm1,%zmm11,%zmm9
	vpternlogq	$0x96,%zmm6,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x10,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm7,%zmm11,%zmm12
	vpternlogq	$0x96,%zmm8,%zmm3,%zmm13

	vpxorq	%zmm13,%zmm12,%zmm12
	vpsrldq	$8,%zmm12,%zmm7
	vpslldq	$8,%zmm12,%zmm8
	vpxorq	%zmm7,%zmm9,%zmm1
	vpxorq	%zmm8,%zmm10,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

	jmp	.L_CALC_AAD_done_1
.L_AAD_blocks_11_1:
	subq	$1024,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%zmm4{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	176(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	240(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vpxorq	%zmm9,%zmm1,%zmm9
	vpxorq	%zmm10,%zmm6,%zmm10
	vpxorq	%zmm12,%zmm7,%zmm12
	vpxorq	%zmm13,%zmm8,%zmm13
	vmovdqu64	304(%rsi),%ymm15
	vinserti64x2	$2,336(%rsi),%zmm15,%zmm15
	vpclmulqdq	$0x01,%zmm15,%zmm4,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm4,%zmm8
	vpclmulqdq	$0x11,%zmm15,%zmm4,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm4,%zmm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

	jmp	.L_CALC_AAD_done_1
.L_AAD_blocks_10_1:
	subq	$1024,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%ymm4{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%ymm16,%ymm4,%ymm4
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	192(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	256(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vpxorq	%zmm9,%zmm1,%zmm9
	vpxorq	%zmm10,%zmm6,%zmm10
	vpxorq	%zmm12,%zmm7,%zmm12
	vpxorq	%zmm13,%zmm8,%zmm13
	vmovdqu64	320(%rsi),%ymm15
	vpclmulqdq	$0x01,%ymm15,%ymm4,%ymm7
	vpclmulqdq	$0x10,%ymm15,%ymm4,%ymm8
	vpclmulqdq	$0x11,%ymm15,%ymm4,%ymm1
	vpclmulqdq	$0x00,%ymm15,%ymm4,%ymm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

	jmp	.L_CALC_AAD_done_1
.L_AAD_blocks_9_1:
	subq	$1024,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%xmm4{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%xmm16,%xmm4,%xmm4
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	208(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	272(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vpxorq	%zmm9,%zmm1,%zmm9
	vpxorq	%zmm10,%zmm6,%zmm10
	vpxorq	%zmm12,%zmm7,%zmm12
	vpxorq	%zmm13,%zmm8,%zmm13
	vmovdqu64	336(%rsi),%xmm15
	vpclmulqdq	$0x01,%xmm15,%xmm4,%xmm7
	vpclmulqdq	$0x10,%xmm15,%xmm4,%xmm8
	vpclmulqdq	$0x11,%xmm15,%xmm4,%xmm1
	vpclmulqdq	$0x00,%xmm15,%xmm4,%xmm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

	jmp	.L_CALC_AAD_done_1
.L_AAD_blocks_8_1:
	subq	$512,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	224(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	288(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vpxorq	%zmm9,%zmm1,%zmm9
	vpxorq	%zmm10,%zmm6,%zmm10
	vpxorq	%zmm12,%zmm7,%zmm12
	vpxorq	%zmm13,%zmm8,%zmm13

	vpxorq	%zmm13,%zmm12,%zmm12
	vpsrldq	$8,%zmm12,%zmm7
	vpslldq	$8,%zmm12,%zmm8
	vpxorq	%zmm7,%zmm9,%zmm1
	vpxorq	%zmm8,%zmm10,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

	jmp	.L_CALC_AAD_done_1
.L_AAD_blocks_7_1:
	subq	$512,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	240(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm13
	vmovdqu64	304(%rsi),%ymm15
	vinserti64x2	$2,336(%rsi),%zmm15,%zmm15
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm8
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

	jmp	.L_CALC_AAD_done_1
.L_AAD_blocks_6_1:
	subq	$512,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%ymm3{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%ymm16,%ymm3,%ymm3
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	256(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm13
	vmovdqu64	320(%rsi),%ymm15
	vpclmulqdq	$0x01,%ymm15,%ymm3,%ymm7
	vpclmulqdq	$0x10,%ymm15,%ymm3,%ymm8
	vpclmulqdq	$0x11,%ymm15,%ymm3,%ymm1
	vpclmulqdq	$0x00,%ymm15,%ymm3,%ymm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

	jmp	.L_CALC_AAD_done_1
.L_AAD_blocks_5_1:
	subq	$512,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%xmm3{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%xmm16,%xmm3,%xmm3
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	272(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm13
	vmovdqu64	336(%rsi),%xmm15
	vpclmulqdq	$0x01,%xmm15,%xmm3,%xmm7
	vpclmulqdq	$0x10,%xmm15,%xmm3,%xmm8
	vpclmulqdq	$0x11,%xmm15,%xmm3,%xmm1
	vpclmulqdq	$0x00,%xmm15,%xmm3,%xmm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

	jmp	.L_CALC_AAD_done_1
.L_AAD_blocks_4_1:
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	288(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm13

	vpxorq	%zmm13,%zmm12,%zmm12
	vpsrldq	$8,%zmm12,%zmm7
	vpslldq	$8,%zmm12,%zmm8
	vpxorq	%zmm7,%zmm9,%zmm1
	vpxorq	%zmm8,%zmm10,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

	jmp	.L_CALC_AAD_done_1
.L_AAD_blocks_3_1:
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	304(%rsi),%ymm15
	vinserti64x2	$2,336(%rsi),%zmm15,%zmm15
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

	jmp	.L_CALC_AAD_done_1
.L_AAD_blocks_2_1:
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%ymm11{%k1}{z}
	vpshufb	%ymm16,%ymm11,%ymm11
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	320(%rsi),%ymm15
	vpclmulqdq	$0x01,%ymm15,%ymm11,%ymm7
	vpclmulqdq	$0x10,%ymm15,%ymm11,%ymm8
	vpclmulqdq	$0x11,%ymm15,%ymm11,%ymm1
	vpclmulqdq	$0x00,%ymm15,%ymm11,%ymm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

	jmp	.L_CALC_AAD_done_1
.L_AAD_blocks_1_1:
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%xmm11{%k1}{z}
	vpshufb	%xmm16,%xmm11,%xmm11
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	336(%rsi),%xmm15
	vpclmulqdq	$0x01,%xmm15,%xmm11,%xmm7
	vpclmulqdq	$0x10,%xmm15,%xmm11,%xmm8
	vpclmulqdq	$0x11,%xmm15,%xmm11,%xmm1
	vpclmulqdq	$0x00,%xmm15,%xmm11,%xmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

.L_CALC_AAD_done_1:
	movq	%rcx,%r10
	shlq	$3,%r10
	vmovq	%r10,%xmm3


	vpxorq	%xmm2,%xmm3,%xmm2

	vmovdqu64	336(%rsi),%xmm1

	vpclmulqdq	$0x11,%xmm1,%xmm2,%xmm11
	vpclmulqdq	$0x00,%xmm1,%xmm2,%xmm3
	vpclmulqdq	$0x01,%xmm1,%xmm2,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm2,%xmm2
	vpxorq	%xmm4,%xmm2,%xmm2

	vpsrldq	$8,%xmm2,%xmm4
	vpslldq	$8,%xmm2,%xmm2
	vpxorq	%xmm4,%xmm11,%xmm11
	vpxorq	%xmm3,%xmm2,%xmm2



	vmovdqu64	POLY2(%rip),%xmm4

	vpclmulqdq	$0x01,%xmm2,%xmm4,%xmm3
	vpslldq	$8,%xmm3,%xmm3
	vpxorq	%xmm3,%xmm2,%xmm2



	vpclmulqdq	$0x00,%xmm2,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm2,%xmm4,%xmm2
	vpslldq	$4,%xmm2,%xmm2

	vpternlogq	$0x96,%xmm3,%xmm11,%xmm2

	vpshufb	SHUF_MASK(%rip),%xmm2,%xmm2
	jmp	skip_iv_len_12_init_IV
iv_len_12_init_IV:

	vmovdqu8	ONEf(%rip),%xmm2
	movq	%rdx,%r11
	movl	$0x0000000000000fff,%r10d
	kmovq	%r10,%k1
	vmovdqu8	(%r11),%xmm2{%k1}
skip_iv_len_12_init_IV:
	vmovdqu	%xmm2,%xmm1


	movl	240(%rdi),%r10d
	cmpl	$9,%r10d
	je	.Laes_128_4
	cmpl	$11,%r10d
	je	.Laes_192_4
	cmpl	$13,%r10d
	je	.Laes_256_4
	jmp	.Lexit_aes_4
.align	32
.Laes_128_4:
	vpxorq	0(%rdi),%xmm1,%xmm1

	vaesenc	16(%rdi),%xmm1,%xmm1

	vaesenc	32(%rdi),%xmm1,%xmm1

	vaesenc	48(%rdi),%xmm1,%xmm1

	vaesenc	64(%rdi),%xmm1,%xmm1

	vaesenc	80(%rdi),%xmm1,%xmm1

	vaesenc	96(%rdi),%xmm1,%xmm1

	vaesenc	112(%rdi),%xmm1,%xmm1

	vaesenc	128(%rdi),%xmm1,%xmm1

	vaesenc	144(%rdi),%xmm1,%xmm1

	vaesenclast	160(%rdi),%xmm1,%xmm1
	jmp	.Lexit_aes_4
.align	32
.Laes_192_4:
	vpxorq	0(%rdi),%xmm1,%xmm1

	vaesenc	16(%rdi),%xmm1,%xmm1

	vaesenc	32(%rdi),%xmm1,%xmm1

	vaesenc	48(%rdi),%xmm1,%xmm1

	vaesenc	64(%rdi),%xmm1,%xmm1

	vaesenc	80(%rdi),%xmm1,%xmm1

	vaesenc	96(%rdi),%xmm1,%xmm1

	vaesenc	112(%rdi),%xmm1,%xmm1

	vaesenc	128(%rdi),%xmm1,%xmm1

	vaesenc	144(%rdi),%xmm1,%xmm1

	vaesenc	160(%rdi),%xmm1,%xmm1

	vaesenc	176(%rdi),%xmm1,%xmm1

	vaesenclast	192(%rdi),%xmm1,%xmm1
	jmp	.Lexit_aes_4
.align	32
.Laes_256_4:
	vpxorq	0(%rdi),%xmm1,%xmm1

	vaesenc	16(%rdi),%xmm1,%xmm1

	vaesenc	32(%rdi),%xmm1,%xmm1

	vaesenc	48(%rdi),%xmm1,%xmm1

	vaesenc	64(%rdi),%xmm1,%xmm1

	vaesenc	80(%rdi),%xmm1,%xmm1

	vaesenc	96(%rdi),%xmm1,%xmm1

	vaesenc	112(%rdi),%xmm1,%xmm1

	vaesenc	128(%rdi),%xmm1,%xmm1

	vaesenc	144(%rdi),%xmm1,%xmm1

	vaesenc	160(%rdi),%xmm1,%xmm1

	vaesenc	176(%rdi),%xmm1,%xmm1

	vaesenc	192(%rdi),%xmm1,%xmm1

	vaesenc	208(%rdi),%xmm1,%xmm1

	vaesenclast	224(%rdi),%xmm1,%xmm1
	jmp	.Lexit_aes_4
.Lexit_aes_4:

	vmovdqu	%xmm1,32(%rsi)


	vpshufb	SHUF_MASK(%rip),%xmm2,%xmm2
	vmovdqu	%xmm2,0(%rsi)
	cmpq	$256,%rcx
	jbe	.Lskip_hkeys_cleanup_5
	vpxor	%xmm0,%xmm0,%xmm0
	vmovdqa64	%zmm0,0(%rsp)
	vmovdqa64	%zmm0,64(%rsp)
	vmovdqa64	%zmm0,128(%rsp)
	vmovdqa64	%zmm0,192(%rsp)
	vmovdqa64	%zmm0,256(%rsp)
	vmovdqa64	%zmm0,320(%rsp)
	vmovdqa64	%zmm0,384(%rsp)
	vmovdqa64	%zmm0,448(%rsp)
	vmovdqa64	%zmm0,512(%rsp)
	vmovdqa64	%zmm0,576(%rsp)
	vmovdqa64	%zmm0,640(%rsp)
	vmovdqa64	%zmm0,704(%rsp)
.Lskip_hkeys_cleanup_5:
	vzeroupper
	leaq	(%rbp),%rsp
.cfi_def_cfa_register	%rsp
	popq	%r15
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r15
	popq	%r14
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r14
	popq	%r13
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r13
	popq	%r12
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r12
	popq	%rbp
.cfi_adjust_cfa_offset	-8
.cfi_restore	%rbp
	popq	%rbx
.cfi_adjust_cfa_offset	-8
.cfi_restore	%rbx
.Labort_setiv:
	.byte	0xf3,0xc3
.Lsetiv_seh_end:
.cfi_endproc	
.size	ossl_aes_gcm_setiv_avx512, .-ossl_aes_gcm_setiv_avx512
.globl	ossl_aes_gcm_update_aad_avx512
.type	ossl_aes_gcm_update_aad_avx512,@function
.align	32
ossl_aes_gcm_update_aad_avx512:
.cfi_startproc	
.Lghash_seh_begin:
.byte	243,15,30,250
	pushq	%rbx
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbx,-16
.Lghash_seh_push_rbx:
	pushq	%rbp
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbp,-24
.Lghash_seh_push_rbp:
	pushq	%r12
.cfi_adjust_cfa_offset	8
.cfi_offset	%r12,-32
.Lghash_seh_push_r12:
	pushq	%r13
.cfi_adjust_cfa_offset	8
.cfi_offset	%r13,-40
.Lghash_seh_push_r13:
	pushq	%r14
.cfi_adjust_cfa_offset	8
.cfi_offset	%r14,-48
.Lghash_seh_push_r14:
	pushq	%r15
.cfi_adjust_cfa_offset	8
.cfi_offset	%r15,-56
.Lghash_seh_push_r15:










	leaq	0(%rsp),%rbp
.cfi_def_cfa_register	%rbp
.Lghash_seh_setfp:

.Lghash_seh_prolog_end:
	subq	$820,%rsp
	andq	$(-64),%rsp
	vmovdqu64	64(%rdi),%xmm14
	movq	%rsi,%r10
	movq	%rdx,%r11
	orq	%r11,%r11
	jz	.L_CALC_AAD_done_6

	xorq	%rbx,%rbx
	vmovdqa64	SHUF_MASK(%rip),%zmm16

.L_get_AAD_loop48x16_6:
	cmpq	$768,%r11
	jl	.L_exit_AAD_loop48x16_6
	vmovdqu64	0(%r10),%zmm11
	vmovdqu64	64(%r10),%zmm3
	vmovdqu64	128(%r10),%zmm4
	vmovdqu64	192(%r10),%zmm5
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	testq	%rbx,%rbx
	jnz	.L_skip_hkeys_precomputation_7

	vmovdqu64	288(%rdi),%zmm1
	vmovdqu64	%zmm1,704(%rsp)

	vmovdqu64	224(%rdi),%zmm9
	vmovdqu64	%zmm9,640(%rsp)


	vshufi64x2	$0x00,%zmm9,%zmm9,%zmm9

	vmovdqu64	160(%rdi),%zmm10
	vmovdqu64	%zmm10,576(%rsp)

	vmovdqu64	96(%rdi),%zmm12
	vmovdqu64	%zmm12,512(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm10,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm10,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm10,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm10,%zmm10

	vpsrldq	$8,%zmm10,%zmm17
	vpslldq	$8,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm10,%zmm10



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm10,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm10,%zmm10



	vpclmulqdq	$0x00,%zmm10,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm10,%zmm17,%zmm10
	vpslldq	$4,%zmm10,%zmm10

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm10

	vmovdqu64	%zmm10,448(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm12,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm12,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm12,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm12,%zmm12

	vpsrldq	$8,%zmm12,%zmm17
	vpslldq	$8,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm12,%zmm12



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm12,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm12,%zmm12



	vpclmulqdq	$0x00,%zmm12,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm12,%zmm17,%zmm12
	vpslldq	$4,%zmm12,%zmm12

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm12

	vmovdqu64	%zmm12,384(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm10,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm10,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm10,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm10,%zmm10

	vpsrldq	$8,%zmm10,%zmm17
	vpslldq	$8,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm10,%zmm10



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm10,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm10,%zmm10



	vpclmulqdq	$0x00,%zmm10,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm10,%zmm17,%zmm10
	vpslldq	$4,%zmm10,%zmm10

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm10

	vmovdqu64	%zmm10,320(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm12,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm12,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm12,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm12,%zmm12

	vpsrldq	$8,%zmm12,%zmm17
	vpslldq	$8,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm12,%zmm12



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm12,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm12,%zmm12



	vpclmulqdq	$0x00,%zmm12,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm12,%zmm17,%zmm12
	vpslldq	$4,%zmm12,%zmm12

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm12

	vmovdqu64	%zmm12,256(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm10,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm10,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm10,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm10,%zmm10

	vpsrldq	$8,%zmm10,%zmm17
	vpslldq	$8,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm10,%zmm10



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm10,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm10,%zmm10



	vpclmulqdq	$0x00,%zmm10,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm10,%zmm17,%zmm10
	vpslldq	$4,%zmm10,%zmm10

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm10

	vmovdqu64	%zmm10,192(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm12,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm12,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm12,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm12,%zmm12

	vpsrldq	$8,%zmm12,%zmm17
	vpslldq	$8,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm12,%zmm12



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm12,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm12,%zmm12



	vpclmulqdq	$0x00,%zmm12,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm12,%zmm17,%zmm12
	vpslldq	$4,%zmm12,%zmm12

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm12

	vmovdqu64	%zmm12,128(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm10,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm10,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm10,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm10,%zmm10

	vpsrldq	$8,%zmm10,%zmm17
	vpslldq	$8,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm10,%zmm10



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm10,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm10,%zmm10



	vpclmulqdq	$0x00,%zmm10,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm10,%zmm17,%zmm10
	vpslldq	$4,%zmm10,%zmm10

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm10

	vmovdqu64	%zmm10,64(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm12,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm12,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm12,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm12,%zmm12

	vpsrldq	$8,%zmm12,%zmm17
	vpslldq	$8,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm12,%zmm12



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm12,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm12,%zmm12



	vpclmulqdq	$0x00,%zmm12,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm12,%zmm17,%zmm12
	vpslldq	$4,%zmm12,%zmm12

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm12

	vmovdqu64	%zmm12,0(%rsp)
.L_skip_hkeys_precomputation_7:
	movq	$1,%rbx
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	0(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm11,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm11,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm11,%zmm12
	vmovdqu64	64(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm3,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm3,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm3,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm3,%zmm18
	vpxorq	%zmm17,%zmm10,%zmm7
	vpxorq	%zmm13,%zmm1,%zmm6
	vpxorq	%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	128(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm4,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm4,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm4,%zmm12
	vmovdqu64	192(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm5,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm5,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm5,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm5,%zmm18

	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	256(%r10),%zmm11
	vmovdqu64	320(%r10),%zmm3
	vmovdqu64	384(%r10),%zmm4
	vmovdqu64	448(%r10),%zmm5
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	vmovdqu64	256(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm11,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm11,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm11,%zmm12
	vmovdqu64	320(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm3,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm3,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm3,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm3,%zmm18
	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	384(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm4,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm4,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm4,%zmm12
	vmovdqu64	448(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm5,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm5,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm5,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm5,%zmm18

	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	512(%r10),%zmm11
	vmovdqu64	576(%r10),%zmm3
	vmovdqu64	640(%r10),%zmm4
	vmovdqu64	704(%r10),%zmm5
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	vmovdqu64	512(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm11,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm11,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm11,%zmm12
	vmovdqu64	576(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm3,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm3,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm3,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm3,%zmm18
	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	640(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm4,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm4,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm4,%zmm12
	vmovdqu64	704(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm5,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm5,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm5,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm5,%zmm18

	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7

	vpsrldq	$8,%zmm7,%zmm1
	vpslldq	$8,%zmm7,%zmm9
	vpxorq	%zmm1,%zmm6,%zmm6
	vpxorq	%zmm9,%zmm8,%zmm8
	vextracti64x4	$1,%zmm6,%ymm1
	vpxorq	%ymm1,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm1
	vpxorq	%xmm1,%xmm6,%xmm6
	vextracti64x4	$1,%zmm8,%ymm9
	vpxorq	%ymm9,%ymm8,%ymm8
	vextracti32x4	$1,%ymm8,%xmm9
	vpxorq	%xmm9,%xmm8,%xmm8
	vmovdqa64	POLY2(%rip),%xmm10


	vpclmulqdq	$0x01,%xmm8,%xmm10,%xmm1
	vpslldq	$8,%xmm1,%xmm1
	vpxorq	%xmm1,%xmm8,%xmm1


	vpclmulqdq	$0x00,%xmm1,%xmm10,%xmm9
	vpsrldq	$4,%xmm9,%xmm9
	vpclmulqdq	$0x10,%xmm1,%xmm10,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm6,%xmm9,%xmm14

	subq	$768,%r11
	je	.L_CALC_AAD_done_6

	addq	$768,%r10
	jmp	.L_get_AAD_loop48x16_6

.L_exit_AAD_loop48x16_6:

	cmpq	$512,%r11
	jl	.L_less_than_32x16_6

	vmovdqu64	0(%r10),%zmm11
	vmovdqu64	64(%r10),%zmm3
	vmovdqu64	128(%r10),%zmm4
	vmovdqu64	192(%r10),%zmm5
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	testq	%rbx,%rbx
	jnz	.L_skip_hkeys_precomputation_8

	vmovdqu64	288(%rdi),%zmm1
	vmovdqu64	%zmm1,704(%rsp)

	vmovdqu64	224(%rdi),%zmm9
	vmovdqu64	%zmm9,640(%rsp)


	vshufi64x2	$0x00,%zmm9,%zmm9,%zmm9

	vmovdqu64	160(%rdi),%zmm10
	vmovdqu64	%zmm10,576(%rsp)

	vmovdqu64	96(%rdi),%zmm12
	vmovdqu64	%zmm12,512(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm10,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm10,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm10,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm10,%zmm10

	vpsrldq	$8,%zmm10,%zmm17
	vpslldq	$8,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm10,%zmm10



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm10,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm10,%zmm10



	vpclmulqdq	$0x00,%zmm10,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm10,%zmm17,%zmm10
	vpslldq	$4,%zmm10,%zmm10

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm10

	vmovdqu64	%zmm10,448(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm12,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm12,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm12,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm12,%zmm12

	vpsrldq	$8,%zmm12,%zmm17
	vpslldq	$8,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm12,%zmm12



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm12,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm12,%zmm12



	vpclmulqdq	$0x00,%zmm12,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm12,%zmm17,%zmm12
	vpslldq	$4,%zmm12,%zmm12

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm12

	vmovdqu64	%zmm12,384(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm10,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm10,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm10,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm10,%zmm10

	vpsrldq	$8,%zmm10,%zmm17
	vpslldq	$8,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm10,%zmm10



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm10,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm10,%zmm10



	vpclmulqdq	$0x00,%zmm10,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm10,%zmm17,%zmm10
	vpslldq	$4,%zmm10,%zmm10

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm10

	vmovdqu64	%zmm10,320(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm12,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm12,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm12,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm12,%zmm12

	vpsrldq	$8,%zmm12,%zmm17
	vpslldq	$8,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm12,%zmm12



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm12,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm12,%zmm12



	vpclmulqdq	$0x00,%zmm12,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm12,%zmm17,%zmm12
	vpslldq	$4,%zmm12,%zmm12

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm12

	vmovdqu64	%zmm12,256(%rsp)
.L_skip_hkeys_precomputation_8:
	movq	$1,%rbx
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	256(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm11,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm11,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm11,%zmm12
	vmovdqu64	320(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm3,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm3,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm3,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm3,%zmm18
	vpxorq	%zmm17,%zmm10,%zmm7
	vpxorq	%zmm13,%zmm1,%zmm6
	vpxorq	%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	384(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm4,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm4,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm4,%zmm12
	vmovdqu64	448(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm5,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm5,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm5,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm5,%zmm18

	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	256(%r10),%zmm11
	vmovdqu64	320(%r10),%zmm3
	vmovdqu64	384(%r10),%zmm4
	vmovdqu64	448(%r10),%zmm5
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	vmovdqu64	512(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm11,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm11,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm11,%zmm12
	vmovdqu64	576(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm3,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm3,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm3,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm3,%zmm18
	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	640(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm4,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm4,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm4,%zmm12
	vmovdqu64	704(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm5,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm5,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm5,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm5,%zmm18

	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7

	vpsrldq	$8,%zmm7,%zmm1
	vpslldq	$8,%zmm7,%zmm9
	vpxorq	%zmm1,%zmm6,%zmm6
	vpxorq	%zmm9,%zmm8,%zmm8
	vextracti64x4	$1,%zmm6,%ymm1
	vpxorq	%ymm1,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm1
	vpxorq	%xmm1,%xmm6,%xmm6
	vextracti64x4	$1,%zmm8,%ymm9
	vpxorq	%ymm9,%ymm8,%ymm8
	vextracti32x4	$1,%ymm8,%xmm9
	vpxorq	%xmm9,%xmm8,%xmm8
	vmovdqa64	POLY2(%rip),%xmm10


	vpclmulqdq	$0x01,%xmm8,%xmm10,%xmm1
	vpslldq	$8,%xmm1,%xmm1
	vpxorq	%xmm1,%xmm8,%xmm1


	vpclmulqdq	$0x00,%xmm1,%xmm10,%xmm9
	vpsrldq	$4,%xmm9,%xmm9
	vpclmulqdq	$0x10,%xmm1,%xmm10,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm6,%xmm9,%xmm14

	subq	$512,%r11
	je	.L_CALC_AAD_done_6

	addq	$512,%r10
	jmp	.L_less_than_16x16_6

.L_less_than_32x16_6:
	cmpq	$256,%r11
	jl	.L_less_than_16x16_6

	vmovdqu64	0(%r10),%zmm11
	vmovdqu64	64(%r10),%zmm3
	vmovdqu64	128(%r10),%zmm4
	vmovdqu64	192(%r10),%zmm5
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	96(%rdi),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm11,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm11,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm11,%zmm12
	vmovdqu64	160(%rdi),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm3,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm3,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm3,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm3,%zmm18
	vpxorq	%zmm17,%zmm10,%zmm7
	vpxorq	%zmm13,%zmm1,%zmm6
	vpxorq	%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	224(%rdi),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm4,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm4,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm4,%zmm12
	vmovdqu64	288(%rdi),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm5,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm5,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm5,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm5,%zmm18

	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7

	vpsrldq	$8,%zmm7,%zmm1
	vpslldq	$8,%zmm7,%zmm9
	vpxorq	%zmm1,%zmm6,%zmm6
	vpxorq	%zmm9,%zmm8,%zmm8
	vextracti64x4	$1,%zmm6,%ymm1
	vpxorq	%ymm1,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm1
	vpxorq	%xmm1,%xmm6,%xmm6
	vextracti64x4	$1,%zmm8,%ymm9
	vpxorq	%ymm9,%ymm8,%ymm8
	vextracti32x4	$1,%ymm8,%xmm9
	vpxorq	%xmm9,%xmm8,%xmm8
	vmovdqa64	POLY2(%rip),%xmm10


	vpclmulqdq	$0x01,%xmm8,%xmm10,%xmm1
	vpslldq	$8,%xmm1,%xmm1
	vpxorq	%xmm1,%xmm8,%xmm1


	vpclmulqdq	$0x00,%xmm1,%xmm10,%xmm9
	vpsrldq	$4,%xmm9,%xmm9
	vpclmulqdq	$0x10,%xmm1,%xmm10,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm6,%xmm9,%xmm14

	subq	$256,%r11
	je	.L_CALC_AAD_done_6

	addq	$256,%r10

.L_less_than_16x16_6:

	leaq	byte64_len_to_mask_table(%rip),%r12
	leaq	(%r12,%r11,8),%r12


	addl	$15,%r11d
	shrl	$4,%r11d
	cmpl	$2,%r11d
	jb	.L_AAD_blocks_1_6
	je	.L_AAD_blocks_2_6
	cmpl	$4,%r11d
	jb	.L_AAD_blocks_3_6
	je	.L_AAD_blocks_4_6
	cmpl	$6,%r11d
	jb	.L_AAD_blocks_5_6
	je	.L_AAD_blocks_6_6
	cmpl	$8,%r11d
	jb	.L_AAD_blocks_7_6
	je	.L_AAD_blocks_8_6
	cmpl	$10,%r11d
	jb	.L_AAD_blocks_9_6
	je	.L_AAD_blocks_10_6
	cmpl	$12,%r11d
	jb	.L_AAD_blocks_11_6
	je	.L_AAD_blocks_12_6
	cmpl	$14,%r11d
	jb	.L_AAD_blocks_13_6
	je	.L_AAD_blocks_14_6
	cmpl	$15,%r11d
	je	.L_AAD_blocks_15_6
.L_AAD_blocks_16_6:
	subq	$1536,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%zmm4
	vmovdqu8	192(%r10),%zmm5{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	96(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	160(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vmovdqu64	224(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x00,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm9,%zmm11,%zmm1
	vpternlogq	$0x96,%zmm10,%zmm3,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x10,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm12,%zmm11,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm3,%zmm8
	vmovdqu64	288(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm5,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm5,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm5,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm5,%zmm13
	vpxorq	%zmm9,%zmm1,%zmm9
	vpxorq	%zmm10,%zmm6,%zmm10
	vpxorq	%zmm12,%zmm7,%zmm12
	vpxorq	%zmm13,%zmm8,%zmm13

	vpxorq	%zmm13,%zmm12,%zmm12
	vpsrldq	$8,%zmm12,%zmm7
	vpslldq	$8,%zmm12,%zmm8
	vpxorq	%zmm7,%zmm9,%zmm1
	vpxorq	%zmm8,%zmm10,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

	jmp	.L_CALC_AAD_done_6
.L_AAD_blocks_15_6:
	subq	$1536,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%zmm4
	vmovdqu8	192(%r10),%zmm5{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	112(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	176(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vmovdqu64	240(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x00,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm1,%zmm11,%zmm9
	vpternlogq	$0x96,%zmm6,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x10,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm7,%zmm11,%zmm12
	vpternlogq	$0x96,%zmm8,%zmm3,%zmm13
	vmovdqu64	304(%rdi),%ymm15
	vinserti64x2	$2,336(%rdi),%zmm15,%zmm15
	vpclmulqdq	$0x01,%zmm15,%zmm5,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm5,%zmm8
	vpclmulqdq	$0x11,%zmm15,%zmm5,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm5,%zmm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

	jmp	.L_CALC_AAD_done_6
.L_AAD_blocks_14_6:
	subq	$1536,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%zmm4
	vmovdqu8	192(%r10),%ymm5{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%ymm16,%ymm5,%ymm5
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	128(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	192(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vmovdqu64	256(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x00,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm1,%zmm11,%zmm9
	vpternlogq	$0x96,%zmm6,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x10,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm7,%zmm11,%zmm12
	vpternlogq	$0x96,%zmm8,%zmm3,%zmm13
	vmovdqu64	320(%rdi),%ymm15
	vpclmulqdq	$0x01,%ymm15,%ymm5,%ymm7
	vpclmulqdq	$0x10,%ymm15,%ymm5,%ymm8
	vpclmulqdq	$0x11,%ymm15,%ymm5,%ymm1
	vpclmulqdq	$0x00,%ymm15,%ymm5,%ymm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

	jmp	.L_CALC_AAD_done_6
.L_AAD_blocks_13_6:
	subq	$1536,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%zmm4
	vmovdqu8	192(%r10),%xmm5{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%xmm16,%xmm5,%xmm5
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	144(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	208(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vmovdqu64	272(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x00,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm1,%zmm11,%zmm9
	vpternlogq	$0x96,%zmm6,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x10,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm7,%zmm11,%zmm12
	vpternlogq	$0x96,%zmm8,%zmm3,%zmm13
	vmovdqu64	336(%rdi),%xmm15
	vpclmulqdq	$0x01,%xmm15,%xmm5,%xmm7
	vpclmulqdq	$0x10,%xmm15,%xmm5,%xmm8
	vpclmulqdq	$0x11,%xmm15,%xmm5,%xmm1
	vpclmulqdq	$0x00,%xmm15,%xmm5,%xmm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

	jmp	.L_CALC_AAD_done_6
.L_AAD_blocks_12_6:
	subq	$1024,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%zmm4{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	160(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	224(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vmovdqu64	288(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x00,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm1,%zmm11,%zmm9
	vpternlogq	$0x96,%zmm6,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x10,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm7,%zmm11,%zmm12
	vpternlogq	$0x96,%zmm8,%zmm3,%zmm13

	vpxorq	%zmm13,%zmm12,%zmm12
	vpsrldq	$8,%zmm12,%zmm7
	vpslldq	$8,%zmm12,%zmm8
	vpxorq	%zmm7,%zmm9,%zmm1
	vpxorq	%zmm8,%zmm10,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

	jmp	.L_CALC_AAD_done_6
.L_AAD_blocks_11_6:
	subq	$1024,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%zmm4{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	176(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	240(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vpxorq	%zmm9,%zmm1,%zmm9
	vpxorq	%zmm10,%zmm6,%zmm10
	vpxorq	%zmm12,%zmm7,%zmm12
	vpxorq	%zmm13,%zmm8,%zmm13
	vmovdqu64	304(%rdi),%ymm15
	vinserti64x2	$2,336(%rdi),%zmm15,%zmm15
	vpclmulqdq	$0x01,%zmm15,%zmm4,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm4,%zmm8
	vpclmulqdq	$0x11,%zmm15,%zmm4,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm4,%zmm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

	jmp	.L_CALC_AAD_done_6
.L_AAD_blocks_10_6:
	subq	$1024,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%ymm4{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%ymm16,%ymm4,%ymm4
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	192(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	256(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vpxorq	%zmm9,%zmm1,%zmm9
	vpxorq	%zmm10,%zmm6,%zmm10
	vpxorq	%zmm12,%zmm7,%zmm12
	vpxorq	%zmm13,%zmm8,%zmm13
	vmovdqu64	320(%rdi),%ymm15
	vpclmulqdq	$0x01,%ymm15,%ymm4,%ymm7
	vpclmulqdq	$0x10,%ymm15,%ymm4,%ymm8
	vpclmulqdq	$0x11,%ymm15,%ymm4,%ymm1
	vpclmulqdq	$0x00,%ymm15,%ymm4,%ymm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

	jmp	.L_CALC_AAD_done_6
.L_AAD_blocks_9_6:
	subq	$1024,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%xmm4{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%xmm16,%xmm4,%xmm4
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	208(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	272(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vpxorq	%zmm9,%zmm1,%zmm9
	vpxorq	%zmm10,%zmm6,%zmm10
	vpxorq	%zmm12,%zmm7,%zmm12
	vpxorq	%zmm13,%zmm8,%zmm13
	vmovdqu64	336(%rdi),%xmm15
	vpclmulqdq	$0x01,%xmm15,%xmm4,%xmm7
	vpclmulqdq	$0x10,%xmm15,%xmm4,%xmm8
	vpclmulqdq	$0x11,%xmm15,%xmm4,%xmm1
	vpclmulqdq	$0x00,%xmm15,%xmm4,%xmm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

	jmp	.L_CALC_AAD_done_6
.L_AAD_blocks_8_6:
	subq	$512,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	224(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	288(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vpxorq	%zmm9,%zmm1,%zmm9
	vpxorq	%zmm10,%zmm6,%zmm10
	vpxorq	%zmm12,%zmm7,%zmm12
	vpxorq	%zmm13,%zmm8,%zmm13

	vpxorq	%zmm13,%zmm12,%zmm12
	vpsrldq	$8,%zmm12,%zmm7
	vpslldq	$8,%zmm12,%zmm8
	vpxorq	%zmm7,%zmm9,%zmm1
	vpxorq	%zmm8,%zmm10,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

	jmp	.L_CALC_AAD_done_6
.L_AAD_blocks_7_6:
	subq	$512,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	240(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm13
	vmovdqu64	304(%rdi),%ymm15
	vinserti64x2	$2,336(%rdi),%zmm15,%zmm15
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm8
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

	jmp	.L_CALC_AAD_done_6
.L_AAD_blocks_6_6:
	subq	$512,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%ymm3{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%ymm16,%ymm3,%ymm3
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	256(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm13
	vmovdqu64	320(%rdi),%ymm15
	vpclmulqdq	$0x01,%ymm15,%ymm3,%ymm7
	vpclmulqdq	$0x10,%ymm15,%ymm3,%ymm8
	vpclmulqdq	$0x11,%ymm15,%ymm3,%ymm1
	vpclmulqdq	$0x00,%ymm15,%ymm3,%ymm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

	jmp	.L_CALC_AAD_done_6
.L_AAD_blocks_5_6:
	subq	$512,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%xmm3{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%xmm16,%xmm3,%xmm3
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	272(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm13
	vmovdqu64	336(%rdi),%xmm15
	vpclmulqdq	$0x01,%xmm15,%xmm3,%xmm7
	vpclmulqdq	$0x10,%xmm15,%xmm3,%xmm8
	vpclmulqdq	$0x11,%xmm15,%xmm3,%xmm1
	vpclmulqdq	$0x00,%xmm15,%xmm3,%xmm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

	jmp	.L_CALC_AAD_done_6
.L_AAD_blocks_4_6:
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	288(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm13

	vpxorq	%zmm13,%zmm12,%zmm12
	vpsrldq	$8,%zmm12,%zmm7
	vpslldq	$8,%zmm12,%zmm8
	vpxorq	%zmm7,%zmm9,%zmm1
	vpxorq	%zmm8,%zmm10,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

	jmp	.L_CALC_AAD_done_6
.L_AAD_blocks_3_6:
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	304(%rdi),%ymm15
	vinserti64x2	$2,336(%rdi),%zmm15,%zmm15
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

	jmp	.L_CALC_AAD_done_6
.L_AAD_blocks_2_6:
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%ymm11{%k1}{z}
	vpshufb	%ymm16,%ymm11,%ymm11
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	320(%rdi),%ymm15
	vpclmulqdq	$0x01,%ymm15,%ymm11,%ymm7
	vpclmulqdq	$0x10,%ymm15,%ymm11,%ymm8
	vpclmulqdq	$0x11,%ymm15,%ymm11,%ymm1
	vpclmulqdq	$0x00,%ymm15,%ymm11,%ymm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

	jmp	.L_CALC_AAD_done_6
.L_AAD_blocks_1_6:
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%xmm11{%k1}{z}
	vpshufb	%xmm16,%xmm11,%xmm11
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	336(%rdi),%xmm15
	vpclmulqdq	$0x01,%xmm15,%xmm11,%xmm7
	vpclmulqdq	$0x10,%xmm15,%xmm11,%xmm8
	vpclmulqdq	$0x11,%xmm15,%xmm11,%xmm1
	vpclmulqdq	$0x00,%xmm15,%xmm11,%xmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

.L_CALC_AAD_done_6:
	vmovdqu64	%xmm14,64(%rdi)
	cmpq	$256,%rdx
	jbe	.Lskip_hkeys_cleanup_9
	vpxor	%xmm0,%xmm0,%xmm0
	vmovdqa64	%zmm0,0(%rsp)
	vmovdqa64	%zmm0,64(%rsp)
	vmovdqa64	%zmm0,128(%rsp)
	vmovdqa64	%zmm0,192(%rsp)
	vmovdqa64	%zmm0,256(%rsp)
	vmovdqa64	%zmm0,320(%rsp)
	vmovdqa64	%zmm0,384(%rsp)
	vmovdqa64	%zmm0,448(%rsp)
	vmovdqa64	%zmm0,512(%rsp)
	vmovdqa64	%zmm0,576(%rsp)
	vmovdqa64	%zmm0,640(%rsp)
	vmovdqa64	%zmm0,704(%rsp)
.Lskip_hkeys_cleanup_9:
	vzeroupper
	leaq	(%rbp),%rsp
.cfi_def_cfa_register	%rsp
	popq	%r15
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r15
	popq	%r14
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r14
	popq	%r13
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r13
	popq	%r12
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r12
	popq	%rbp
.cfi_adjust_cfa_offset	-8
.cfi_restore	%rbp
	popq	%rbx
.cfi_adjust_cfa_offset	-8
.cfi_restore	%rbx
.Lexit_update_aad:
	.byte	0xf3,0xc3
.Lghash_seh_end:
.cfi_endproc	
.size	ossl_aes_gcm_update_aad_avx512, .-ossl_aes_gcm_update_aad_avx512
.globl	ossl_aes_gcm_encrypt_avx512
.type	ossl_aes_gcm_encrypt_avx512,@function
.align	32
ossl_aes_gcm_encrypt_avx512:
.cfi_startproc	
.Lencrypt_seh_begin:
.byte	243,15,30,250
	pushq	%rbx
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbx,-16
.Lencrypt_seh_push_rbx:
	pushq	%rbp
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbp,-24
.Lencrypt_seh_push_rbp:
	pushq	%r12
.cfi_adjust_cfa_offset	8
.cfi_offset	%r12,-32
.Lencrypt_seh_push_r12:
	pushq	%r13
.cfi_adjust_cfa_offset	8
.cfi_offset	%r13,-40
.Lencrypt_seh_push_r13:
	pushq	%r14
.cfi_adjust_cfa_offset	8
.cfi_offset	%r14,-48
.Lencrypt_seh_push_r14:
	pushq	%r15
.cfi_adjust_cfa_offset	8
.cfi_offset	%r15,-56
.Lencrypt_seh_push_r15:










	leaq	0(%rsp),%rbp
.cfi_def_cfa_register	%rbp
.Lencrypt_seh_setfp:

.Lencrypt_seh_prolog_end:
	subq	$1588,%rsp
	andq	$(-64),%rsp


	movl	240(%rdi),%eax
	cmpl	$9,%eax
	je	.Laes_gcm_encrypt_128_avx512
	cmpl	$11,%eax
	je	.Laes_gcm_encrypt_192_avx512
	cmpl	$13,%eax
	je	.Laes_gcm_encrypt_256_avx512
	xorl	%eax,%eax
	jmp	.Lexit_gcm_encrypt
.align	32
.Laes_gcm_encrypt_128_avx512:
	orq	%r8,%r8
	je	.L_enc_dec_done_10
	xorq	%r14,%r14
	vmovdqu64	64(%rsi),%xmm14

	movq	(%rdx),%r11
	orq	%r11,%r11
	je	.L_partial_block_done_11
	movl	$16,%r10d
	leaq	byte_len_to_mask_table(%rip),%r12
	cmpq	%r10,%r8
	cmovcq	%r8,%r10
	kmovw	(%r12,%r10,2),%k1
	vmovdqu8	(%rcx),%xmm0{%k1}{z}

	vmovdqu64	16(%rsi),%xmm3
	vmovdqu64	336(%rsi),%xmm4



	leaq	SHIFT_MASK(%rip),%r12
	addq	%r11,%r12
	vmovdqu64	(%r12),%xmm5
	vpshufb	%xmm5,%xmm3,%xmm3
	vpxorq	%xmm0,%xmm3,%xmm3


	leaq	(%r8,%r11,1),%r13
	subq	$16,%r13
	jge	.L_no_extra_mask_11
	subq	%r13,%r12
.L_no_extra_mask_11:



	vmovdqu64	16(%r12),%xmm0
	vpand	%xmm0,%xmm3,%xmm3
	vpshufb	SHUF_MASK(%rip),%xmm3,%xmm3
	vpshufb	%xmm5,%xmm3,%xmm3
	vpxorq	%xmm3,%xmm14,%xmm14
	cmpq	$0,%r13
	jl	.L_partial_incomplete_11

	vpclmulqdq	$0x11,%xmm4,%xmm14,%xmm7
	vpclmulqdq	$0x00,%xmm4,%xmm14,%xmm10
	vpclmulqdq	$0x01,%xmm4,%xmm14,%xmm11
	vpclmulqdq	$0x10,%xmm4,%xmm14,%xmm14
	vpxorq	%xmm11,%xmm14,%xmm14

	vpsrldq	$8,%xmm14,%xmm11
	vpslldq	$8,%xmm14,%xmm14
	vpxorq	%xmm11,%xmm7,%xmm7
	vpxorq	%xmm10,%xmm14,%xmm14



	vmovdqu64	POLY2(%rip),%xmm11

	vpclmulqdq	$0x01,%xmm14,%xmm11,%xmm10
	vpslldq	$8,%xmm10,%xmm10
	vpxorq	%xmm10,%xmm14,%xmm14



	vpclmulqdq	$0x00,%xmm14,%xmm11,%xmm10
	vpsrldq	$4,%xmm10,%xmm10
	vpclmulqdq	$0x10,%xmm14,%xmm11,%xmm14
	vpslldq	$4,%xmm14,%xmm14

	vpternlogq	$0x96,%xmm10,%xmm7,%xmm14

	movq	$0,(%rdx)

	movq	%r11,%r12
	movq	$16,%r11
	subq	%r12,%r11
	jmp	.L_enc_dec_done_11

.L_partial_incomplete_11:
	addq	%r8,(%rdx)
	movq	%r8,%r11

.L_enc_dec_done_11:


	leaq	byte_len_to_mask_table(%rip),%r12
	kmovw	(%r12,%r11,2),%k1
	vmovdqu64	%xmm14,64(%rsi)

	vpshufb	SHUF_MASK(%rip),%xmm3,%xmm3
	vpshufb	%xmm5,%xmm3,%xmm3
	movq	%r9,%r12
	vmovdqu8	%xmm3,(%r12){%k1}
.L_partial_block_done_11:
	vmovdqu64	0(%rsi),%xmm2
	subq	%r11,%r8
	je	.L_enc_dec_done_10
	cmpq	$256,%r8
	jbe	.L_message_below_equal_16_blocks_10

	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vmovdqa64	ddq_addbe_4444(%rip),%zmm27
	vmovdqa64	ddq_addbe_1234(%rip),%zmm28






	vmovd	%xmm2,%r15d
	andl	$255,%r15d

	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpshufb	%zmm29,%zmm2,%zmm2



	cmpb	$240,%r15b
	jae	.L_next_16_overflow_12
	vpaddd	%zmm28,%zmm2,%zmm7
	vpaddd	%zmm27,%zmm7,%zmm10
	vpaddd	%zmm27,%zmm10,%zmm11
	vpaddd	%zmm27,%zmm11,%zmm12
	jmp	.L_next_16_ok_12
.L_next_16_overflow_12:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm12
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm7
	vpaddd	%zmm12,%zmm7,%zmm10
	vpaddd	%zmm12,%zmm10,%zmm11
	vpaddd	%zmm12,%zmm11,%zmm12
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vpshufb	%zmm29,%zmm12,%zmm12
.L_next_16_ok_12:
	vshufi64x2	$255,%zmm12,%zmm12,%zmm2
	addb	$16,%r15b

	vmovdqu8	0(%rcx,%r11,1),%zmm0
	vmovdqu8	64(%rcx,%r11,1),%zmm3
	vmovdqu8	128(%rcx,%r11,1),%zmm4
	vmovdqu8	192(%rcx,%r11,1),%zmm5


	vbroadcastf64x2	0(%rdi),%zmm6
	vpxorq	%zmm6,%zmm7,%zmm7
	vpxorq	%zmm6,%zmm10,%zmm10
	vpxorq	%zmm6,%zmm11,%zmm11
	vpxorq	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	16(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	32(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	48(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	64(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	80(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	96(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	112(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	128(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	144(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	160(%rdi),%zmm6
	vaesenclast	%zmm6,%zmm7,%zmm7
	vaesenclast	%zmm6,%zmm10,%zmm10
	vaesenclast	%zmm6,%zmm11,%zmm11
	vaesenclast	%zmm6,%zmm12,%zmm12


	vpxorq	%zmm0,%zmm7,%zmm7
	vpxorq	%zmm3,%zmm10,%zmm10
	vpxorq	%zmm4,%zmm11,%zmm11
	vpxorq	%zmm5,%zmm12,%zmm12


	movq	%r9,%r10
	vmovdqu8	%zmm7,0(%r10,%r11,1)
	vmovdqu8	%zmm10,64(%r10,%r11,1)
	vmovdqu8	%zmm11,128(%r10,%r11,1)
	vmovdqu8	%zmm12,192(%r10,%r11,1)

	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vpshufb	%zmm29,%zmm12,%zmm12
	vmovdqa64	%zmm7,768(%rsp)
	vmovdqa64	%zmm10,832(%rsp)
	vmovdqa64	%zmm11,896(%rsp)
	vmovdqa64	%zmm12,960(%rsp)
	testq	%r14,%r14
	jnz	.L_skip_hkeys_precomputation_13

	vmovdqu64	288(%rsi),%zmm0
	vmovdqu64	%zmm0,704(%rsp)

	vmovdqu64	224(%rsi),%zmm3
	vmovdqu64	%zmm3,640(%rsp)


	vshufi64x2	$0x00,%zmm3,%zmm3,%zmm3

	vmovdqu64	160(%rsi),%zmm4
	vmovdqu64	%zmm4,576(%rsp)

	vmovdqu64	96(%rsi),%zmm5
	vmovdqu64	%zmm5,512(%rsp)
.L_skip_hkeys_precomputation_13:
	cmpq	$512,%r8
	jb	.L_message_below_32_blocks_10



	cmpb	$240,%r15b
	jae	.L_next_16_overflow_14
	vpaddd	%zmm28,%zmm2,%zmm7
	vpaddd	%zmm27,%zmm7,%zmm10
	vpaddd	%zmm27,%zmm10,%zmm11
	vpaddd	%zmm27,%zmm11,%zmm12
	jmp	.L_next_16_ok_14
.L_next_16_overflow_14:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm12
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm7
	vpaddd	%zmm12,%zmm7,%zmm10
	vpaddd	%zmm12,%zmm10,%zmm11
	vpaddd	%zmm12,%zmm11,%zmm12
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vpshufb	%zmm29,%zmm12,%zmm12
.L_next_16_ok_14:
	vshufi64x2	$255,%zmm12,%zmm12,%zmm2
	addb	$16,%r15b

	vmovdqu8	256(%rcx,%r11,1),%zmm0
	vmovdqu8	320(%rcx,%r11,1),%zmm3
	vmovdqu8	384(%rcx,%r11,1),%zmm4
	vmovdqu8	448(%rcx,%r11,1),%zmm5


	vbroadcastf64x2	0(%rdi),%zmm6
	vpxorq	%zmm6,%zmm7,%zmm7
	vpxorq	%zmm6,%zmm10,%zmm10
	vpxorq	%zmm6,%zmm11,%zmm11
	vpxorq	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	16(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	32(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	48(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	64(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	80(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	96(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	112(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	128(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	144(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	160(%rdi),%zmm6
	vaesenclast	%zmm6,%zmm7,%zmm7
	vaesenclast	%zmm6,%zmm10,%zmm10
	vaesenclast	%zmm6,%zmm11,%zmm11
	vaesenclast	%zmm6,%zmm12,%zmm12


	vpxorq	%zmm0,%zmm7,%zmm7
	vpxorq	%zmm3,%zmm10,%zmm10
	vpxorq	%zmm4,%zmm11,%zmm11
	vpxorq	%zmm5,%zmm12,%zmm12


	movq	%r9,%r10
	vmovdqu8	%zmm7,256(%r10,%r11,1)
	vmovdqu8	%zmm10,320(%r10,%r11,1)
	vmovdqu8	%zmm11,384(%r10,%r11,1)
	vmovdqu8	%zmm12,448(%r10,%r11,1)

	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vpshufb	%zmm29,%zmm12,%zmm12
	vmovdqa64	%zmm7,1024(%rsp)
	vmovdqa64	%zmm10,1088(%rsp)
	vmovdqa64	%zmm11,1152(%rsp)
	vmovdqa64	%zmm12,1216(%rsp)
	testq	%r14,%r14
	jnz	.L_skip_hkeys_precomputation_15
	vmovdqu64	640(%rsp),%zmm3


	vshufi64x2	$0x00,%zmm3,%zmm3,%zmm3

	vmovdqu64	576(%rsp),%zmm4
	vmovdqu64	512(%rsp),%zmm5

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,448(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,384(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,320(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,256(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,192(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,128(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,64(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,0(%rsp)
.L_skip_hkeys_precomputation_15:
	movq	$1,%r14
	addq	$512,%r11
	subq	$512,%r8

	cmpq	$768,%r8
	jb	.L_no_more_big_nblocks_10
.L_encrypt_big_nblocks_10:
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_16
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_16
.L_16_blocks_overflow_16:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_16:
	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp),%zmm18
	vmovdqa64	832(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp),%zmm1
	vmovdqa64	896(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp),%zmm18
	vmovdqa64	960(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm15,%zmm10,%zmm26
	vpxorq	%zmm12,%zmm6,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1)
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqa64	%zmm0,1280(%rsp)
	vmovdqa64	%zmm3,1344(%rsp)
	vmovdqa64	%zmm4,1408(%rsp)
	vmovdqa64	%zmm5,1472(%rsp)
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_17
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_17
.L_16_blocks_overflow_17:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_17:
	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	256(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	320(%rsp),%zmm18
	vmovdqa64	1088(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	384(%rsp),%zmm1
	vmovdqa64	1152(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	448(%rsp),%zmm18
	vmovdqa64	1216(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	256(%rcx,%r11,1),%zmm17
	vmovdqu8	320(%rcx,%r11,1),%zmm19
	vmovdqu8	384(%rcx,%r11,1),%zmm20
	vmovdqu8	448(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vpternlogq	$0x96,%zmm12,%zmm6,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,256(%r10,%r11,1)
	vmovdqu8	%zmm3,320(%r10,%r11,1)
	vmovdqu8	%zmm4,384(%r10,%r11,1)
	vmovdqu8	%zmm5,448(%r10,%r11,1)
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqa64	%zmm0,768(%rsp)
	vmovdqa64	%zmm3,832(%rsp)
	vmovdqa64	%zmm4,896(%rsp)
	vmovdqa64	%zmm5,960(%rsp)
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_18
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_18
.L_16_blocks_overflow_18:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_18:
	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	512(%rcx,%r11,1),%zmm17
	vmovdqu8	576(%rcx,%r11,1),%zmm19
	vmovdqu8	640(%rcx,%r11,1),%zmm20
	vmovdqu8	704(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30


	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10

	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpternlogq	$0x96,%zmm15,%zmm12,%zmm6
	vpxorq	%zmm24,%zmm6,%zmm6
	vpternlogq	$0x96,%zmm10,%zmm13,%zmm7
	vpxorq	%zmm25,%zmm7,%zmm7
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vextracti64x4	$1,%zmm6,%ymm12
	vpxorq	%ymm12,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm12
	vpxorq	%xmm12,%xmm6,%xmm6
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm6
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,512(%r10,%r11,1)
	vmovdqu8	%zmm3,576(%r10,%r11,1)
	vmovdqu8	%zmm4,640(%r10,%r11,1)
	vmovdqu8	%zmm5,704(%r10,%r11,1)
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqa64	%zmm0,1024(%rsp)
	vmovdqa64	%zmm3,1088(%rsp)
	vmovdqa64	%zmm4,1152(%rsp)
	vmovdqa64	%zmm5,1216(%rsp)
	vmovdqa64	%zmm6,%zmm14

	addq	$768,%r11
	subq	$768,%r8
	cmpq	$768,%r8
	jae	.L_encrypt_big_nblocks_10

.L_no_more_big_nblocks_10:

	cmpq	$512,%r8
	jae	.L_encrypt_32_blocks_10

	cmpq	$256,%r8
	jae	.L_encrypt_16_blocks_10
.L_encrypt_0_blocks_ghash_32_10:
	movl	%r8d,%r10d
	andl	$~15,%r10d
	movl	$256,%ebx
	subl	%r10d,%ebx
	vmovdqa64	768(%rsp),%zmm13
	vpxorq	%zmm14,%zmm13,%zmm13
	vmovdqu64	0(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	832(%rsp),%zmm13
	vmovdqu64	64(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpxorq	%zmm10,%zmm4,%zmm26
	vpxorq	%zmm6,%zmm0,%zmm24
	vpxorq	%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	896(%rsp),%zmm13
	vmovdqu64	128(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	960(%rsp),%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	addl	$256,%ebx
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_19

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_19
	jb	.L_last_num_blocks_is_7_1_19


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_19
	jb	.L_last_num_blocks_is_11_9_19


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_19
	ja	.L_last_num_blocks_is_16_19
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_19
	jmp	.L_last_num_blocks_is_13_19

.L_last_num_blocks_is_11_9_19:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_19
	ja	.L_last_num_blocks_is_11_19
	jmp	.L_last_num_blocks_is_9_19

.L_last_num_blocks_is_7_1_19:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_19
	jb	.L_last_num_blocks_is_3_1_19

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_19
	je	.L_last_num_blocks_is_6_19
	jmp	.L_last_num_blocks_is_5_19

.L_last_num_blocks_is_3_1_19:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_19
	je	.L_last_num_blocks_is_2_19
.L_last_num_blocks_is_1_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_20
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_20

.L_16_blocks_overflow_20:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_20:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%xmm29,%xmm0,%xmm17
	vextracti32x4	$0,%zmm17,%xmm7


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_21





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_21
.L_small_initial_partial_block_21:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)


	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm0


	vpclmulqdq	$0x01,%xmm25,%xmm0,%xmm3
	vpslldq	$8,%xmm3,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm3


	vpclmulqdq	$0x00,%xmm3,%xmm0,%xmm4
	vpsrldq	$4,%xmm4,%xmm4
	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm4,%xmm14












	vpxorq	%xmm7,%xmm14,%xmm14

	jmp	.L_after_reduction_21
.L_small_initial_compute_done_21:
.L_after_reduction_21:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_2_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$254,%r15d
	jae	.L_16_blocks_overflow_22
	vpaddd	%ymm28,%ymm2,%ymm0
	jmp	.L_16_blocks_ok_22

.L_16_blocks_overflow_22:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%ymm29,%ymm0,%ymm0
.L_16_blocks_ok_22:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%ymm17{%k1}{z}
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vaesenclast	%ymm30,%ymm0,%ymm0
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%ymm29,%ymm0,%ymm17
	vextracti32x4	$1,%zmm17,%xmm7
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_23





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_23
.L_small_initial_partial_block_23:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_23:

	orq	%r8,%r8
	je	.L_after_reduction_23
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_23:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_3_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$253,%r15d
	jae	.L_16_blocks_overflow_24
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_24

.L_16_blocks_overflow_24:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_24:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$2,%zmm17,%xmm7
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_25





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_25
.L_small_initial_partial_block_25:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_25:

	orq	%r8,%r8
	je	.L_after_reduction_25
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_25:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_4_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$252,%r15d
	jae	.L_16_blocks_overflow_26
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_26

.L_16_blocks_overflow_26:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_26:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$3,%zmm17,%xmm7
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_27





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_27
.L_small_initial_partial_block_27:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_27:

	orq	%r8,%r8
	je	.L_after_reduction_27
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_27:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_5_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$251,%r15d
	jae	.L_16_blocks_overflow_28
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%xmm27,%xmm0,%xmm3
	jmp	.L_16_blocks_ok_28

.L_16_blocks_overflow_28:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
.L_16_blocks_ok_28:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%xmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%xmm30,%xmm3,%xmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%xmm19,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%xmm29,%xmm3,%xmm19
	vextracti32x4	$0,%zmm19,%xmm7
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_29





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_29
.L_small_initial_partial_block_29:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_29:

	orq	%r8,%r8
	je	.L_after_reduction_29
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_29:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_6_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$250,%r15d
	jae	.L_16_blocks_overflow_30
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%ymm27,%ymm0,%ymm3
	jmp	.L_16_blocks_ok_30

.L_16_blocks_overflow_30:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
.L_16_blocks_ok_30:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%ymm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%ymm30,%ymm3,%ymm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%ymm29,%ymm3,%ymm19
	vextracti32x4	$1,%zmm19,%xmm7
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_31





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_31
.L_small_initial_partial_block_31:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_31:

	orq	%r8,%r8
	je	.L_after_reduction_31
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_31:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_7_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$249,%r15d
	jae	.L_16_blocks_overflow_32
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_32

.L_16_blocks_overflow_32:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_32:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$2,%zmm19,%xmm7
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_33





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_33
.L_small_initial_partial_block_33:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_33:

	orq	%r8,%r8
	je	.L_after_reduction_33
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_33:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_8_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$248,%r15d
	jae	.L_16_blocks_overflow_34
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_34

.L_16_blocks_overflow_34:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_34:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$3,%zmm19,%xmm7
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_35





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_35
.L_small_initial_partial_block_35:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_35:

	orq	%r8,%r8
	je	.L_after_reduction_35
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_35:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_9_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$247,%r15d
	jae	.L_16_blocks_overflow_36
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%xmm27,%xmm3,%xmm4
	jmp	.L_16_blocks_ok_36

.L_16_blocks_overflow_36:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
.L_16_blocks_ok_36:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%xmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%xmm30,%xmm4,%xmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%xmm20,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%xmm29,%xmm4,%xmm20
	vextracti32x4	$0,%zmm20,%xmm7
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_37





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_37
.L_small_initial_partial_block_37:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_37:

	orq	%r8,%r8
	je	.L_after_reduction_37
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_37:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_10_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$246,%r15d
	jae	.L_16_blocks_overflow_38
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%ymm27,%ymm3,%ymm4
	jmp	.L_16_blocks_ok_38

.L_16_blocks_overflow_38:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
.L_16_blocks_ok_38:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%ymm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%ymm30,%ymm4,%ymm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%ymm20,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%ymm29,%ymm4,%ymm20
	vextracti32x4	$1,%zmm20,%xmm7
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_39





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_39
.L_small_initial_partial_block_39:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_39:

	orq	%r8,%r8
	je	.L_after_reduction_39
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_39:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_11_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$245,%r15d
	jae	.L_16_blocks_overflow_40
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_40

.L_16_blocks_overflow_40:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_40:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$2,%zmm20,%xmm7
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_41





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_41
.L_small_initial_partial_block_41:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_41:

	orq	%r8,%r8
	je	.L_after_reduction_41
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_41:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_12_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$244,%r15d
	jae	.L_16_blocks_overflow_42
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_42

.L_16_blocks_overflow_42:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_42:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$3,%zmm20,%xmm7
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_43





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_43
.L_small_initial_partial_block_43:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_43:

	orq	%r8,%r8
	je	.L_after_reduction_43
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_43:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_13_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$243,%r15d
	jae	.L_16_blocks_overflow_44
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%xmm27,%xmm4,%xmm5
	jmp	.L_16_blocks_ok_44

.L_16_blocks_overflow_44:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
.L_16_blocks_ok_44:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%xmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%xmm30,%xmm5,%xmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%xmm21,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%xmm29,%xmm5,%xmm21
	vextracti32x4	$0,%zmm21,%xmm7
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_45





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_45
.L_small_initial_partial_block_45:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_45:

	orq	%r8,%r8
	je	.L_after_reduction_45
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_45:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_14_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$242,%r15d
	jae	.L_16_blocks_overflow_46
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%ymm27,%ymm4,%ymm5
	jmp	.L_16_blocks_ok_46

.L_16_blocks_overflow_46:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
.L_16_blocks_ok_46:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%ymm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%ymm30,%ymm5,%ymm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%ymm21,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%ymm29,%ymm5,%ymm21
	vextracti32x4	$1,%zmm21,%xmm7
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_47





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_47
.L_small_initial_partial_block_47:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_47:

	orq	%r8,%r8
	je	.L_after_reduction_47
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_47:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_15_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$241,%r15d
	jae	.L_16_blocks_overflow_48
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_48

.L_16_blocks_overflow_48:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_48:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$2,%zmm21,%xmm7
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_49





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_49
.L_small_initial_partial_block_49:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_49:

	orq	%r8,%r8
	je	.L_after_reduction_49
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_49:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_16_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$240,%r15d
	jae	.L_16_blocks_overflow_50
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_50

.L_16_blocks_overflow_50:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_50:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$3,%zmm21,%xmm7
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_51:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_51:
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_51:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_0_19:
	vmovdqa64	1024(%rsp),%zmm13
	vmovdqu64	0(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1088(%rsp),%zmm13
	vmovdqu64	64(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1152(%rsp),%zmm13
	vmovdqu64	128(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1216(%rsp),%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

.L_last_blocks_done_19:
	vpshufb	%xmm29,%xmm2,%xmm2
	jmp	.L_ghash_done_10
.L_encrypt_32_blocks_10:
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_52
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_52
.L_16_blocks_overflow_52:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_52:
	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp),%zmm18
	vmovdqa64	832(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp),%zmm1
	vmovdqa64	896(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp),%zmm18
	vmovdqa64	960(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm15,%zmm10,%zmm26
	vpxorq	%zmm12,%zmm6,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1)
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqa64	%zmm0,1280(%rsp)
	vmovdqa64	%zmm3,1344(%rsp)
	vmovdqa64	%zmm4,1408(%rsp)
	vmovdqa64	%zmm5,1472(%rsp)
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_53
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_53
.L_16_blocks_overflow_53:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_53:
	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	256(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	320(%rsp),%zmm18
	vmovdqa64	1088(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	384(%rsp),%zmm1
	vmovdqa64	1152(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	448(%rsp),%zmm18
	vmovdqa64	1216(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	256(%rcx,%r11,1),%zmm17
	vmovdqu8	320(%rcx,%r11,1),%zmm19
	vmovdqu8	384(%rcx,%r11,1),%zmm20
	vmovdqu8	448(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vpternlogq	$0x96,%zmm12,%zmm6,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,256(%r10,%r11,1)
	vmovdqu8	%zmm3,320(%r10,%r11,1)
	vmovdqu8	%zmm4,384(%r10,%r11,1)
	vmovdqu8	%zmm5,448(%r10,%r11,1)
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqa64	%zmm0,768(%rsp)
	vmovdqa64	%zmm3,832(%rsp)
	vmovdqa64	%zmm4,896(%rsp)
	vmovdqa64	%zmm5,960(%rsp)
	vmovdqa64	1280(%rsp),%zmm13
	vmovdqu64	512(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1344(%rsp),%zmm13
	vmovdqu64	576(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1408(%rsp),%zmm13
	vmovdqu64	640(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1472(%rsp),%zmm13
	vmovdqu64	704(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

	subq	$512,%r8
	addq	$512,%r11
	movl	%r8d,%r10d
	andl	$~15,%r10d
	movl	$512,%ebx
	subl	%r10d,%ebx
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_54

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_54
	jb	.L_last_num_blocks_is_7_1_54


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_54
	jb	.L_last_num_blocks_is_11_9_54


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_54
	ja	.L_last_num_blocks_is_16_54
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_54
	jmp	.L_last_num_blocks_is_13_54

.L_last_num_blocks_is_11_9_54:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_54
	ja	.L_last_num_blocks_is_11_54
	jmp	.L_last_num_blocks_is_9_54

.L_last_num_blocks_is_7_1_54:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_54
	jb	.L_last_num_blocks_is_3_1_54

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_54
	je	.L_last_num_blocks_is_6_54
	jmp	.L_last_num_blocks_is_5_54

.L_last_num_blocks_is_3_1_54:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_54
	je	.L_last_num_blocks_is_2_54
.L_last_num_blocks_is_1_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_55
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_55

.L_16_blocks_overflow_55:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_55:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%xmm29,%xmm0,%xmm17
	vextracti32x4	$0,%zmm17,%xmm7


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_56





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_56
.L_small_initial_partial_block_56:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)


	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm0


	vpclmulqdq	$0x01,%xmm25,%xmm0,%xmm3
	vpslldq	$8,%xmm3,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm3


	vpclmulqdq	$0x00,%xmm3,%xmm0,%xmm4
	vpsrldq	$4,%xmm4,%xmm4
	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm4,%xmm14












	vpxorq	%xmm7,%xmm14,%xmm14

	jmp	.L_after_reduction_56
.L_small_initial_compute_done_56:
.L_after_reduction_56:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_2_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$254,%r15d
	jae	.L_16_blocks_overflow_57
	vpaddd	%ymm28,%ymm2,%ymm0
	jmp	.L_16_blocks_ok_57

.L_16_blocks_overflow_57:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%ymm29,%ymm0,%ymm0
.L_16_blocks_ok_57:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%ymm17{%k1}{z}
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vaesenclast	%ymm30,%ymm0,%ymm0
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%ymm29,%ymm0,%ymm17
	vextracti32x4	$1,%zmm17,%xmm7
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_58





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_58
.L_small_initial_partial_block_58:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_58:

	orq	%r8,%r8
	je	.L_after_reduction_58
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_58:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_3_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$253,%r15d
	jae	.L_16_blocks_overflow_59
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_59

.L_16_blocks_overflow_59:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_59:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$2,%zmm17,%xmm7
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_60





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_60
.L_small_initial_partial_block_60:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_60:

	orq	%r8,%r8
	je	.L_after_reduction_60
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_60:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_4_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$252,%r15d
	jae	.L_16_blocks_overflow_61
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_61

.L_16_blocks_overflow_61:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_61:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$3,%zmm17,%xmm7
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_62





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_62
.L_small_initial_partial_block_62:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_62:

	orq	%r8,%r8
	je	.L_after_reduction_62
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_62:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_5_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$251,%r15d
	jae	.L_16_blocks_overflow_63
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%xmm27,%xmm0,%xmm3
	jmp	.L_16_blocks_ok_63

.L_16_blocks_overflow_63:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
.L_16_blocks_ok_63:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%xmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%xmm30,%xmm3,%xmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%xmm19,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%xmm29,%xmm3,%xmm19
	vextracti32x4	$0,%zmm19,%xmm7
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_64





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_64
.L_small_initial_partial_block_64:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_64:

	orq	%r8,%r8
	je	.L_after_reduction_64
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_64:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_6_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$250,%r15d
	jae	.L_16_blocks_overflow_65
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%ymm27,%ymm0,%ymm3
	jmp	.L_16_blocks_ok_65

.L_16_blocks_overflow_65:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
.L_16_blocks_ok_65:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%ymm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%ymm30,%ymm3,%ymm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%ymm29,%ymm3,%ymm19
	vextracti32x4	$1,%zmm19,%xmm7
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_66





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_66
.L_small_initial_partial_block_66:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_66:

	orq	%r8,%r8
	je	.L_after_reduction_66
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_66:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_7_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$249,%r15d
	jae	.L_16_blocks_overflow_67
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_67

.L_16_blocks_overflow_67:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_67:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$2,%zmm19,%xmm7
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_68





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_68
.L_small_initial_partial_block_68:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_68:

	orq	%r8,%r8
	je	.L_after_reduction_68
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_68:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_8_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$248,%r15d
	jae	.L_16_blocks_overflow_69
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_69

.L_16_blocks_overflow_69:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_69:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$3,%zmm19,%xmm7
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_70





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_70
.L_small_initial_partial_block_70:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_70:

	orq	%r8,%r8
	je	.L_after_reduction_70
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_70:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_9_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$247,%r15d
	jae	.L_16_blocks_overflow_71
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%xmm27,%xmm3,%xmm4
	jmp	.L_16_blocks_ok_71

.L_16_blocks_overflow_71:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
.L_16_blocks_ok_71:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%xmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%xmm30,%xmm4,%xmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%xmm20,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%xmm29,%xmm4,%xmm20
	vextracti32x4	$0,%zmm20,%xmm7
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_72





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_72
.L_small_initial_partial_block_72:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_72:

	orq	%r8,%r8
	je	.L_after_reduction_72
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_72:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_10_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$246,%r15d
	jae	.L_16_blocks_overflow_73
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%ymm27,%ymm3,%ymm4
	jmp	.L_16_blocks_ok_73

.L_16_blocks_overflow_73:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
.L_16_blocks_ok_73:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%ymm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%ymm30,%ymm4,%ymm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%ymm20,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%ymm29,%ymm4,%ymm20
	vextracti32x4	$1,%zmm20,%xmm7
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_74





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_74
.L_small_initial_partial_block_74:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_74:

	orq	%r8,%r8
	je	.L_after_reduction_74
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_74:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_11_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$245,%r15d
	jae	.L_16_blocks_overflow_75
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_75

.L_16_blocks_overflow_75:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_75:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$2,%zmm20,%xmm7
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_76





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_76
.L_small_initial_partial_block_76:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_76:

	orq	%r8,%r8
	je	.L_after_reduction_76
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_76:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_12_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$244,%r15d
	jae	.L_16_blocks_overflow_77
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_77

.L_16_blocks_overflow_77:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_77:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$3,%zmm20,%xmm7
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_78





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_78
.L_small_initial_partial_block_78:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_78:

	orq	%r8,%r8
	je	.L_after_reduction_78
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_78:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_13_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$243,%r15d
	jae	.L_16_blocks_overflow_79
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%xmm27,%xmm4,%xmm5
	jmp	.L_16_blocks_ok_79

.L_16_blocks_overflow_79:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
.L_16_blocks_ok_79:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%xmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%xmm30,%xmm5,%xmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%xmm21,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%xmm29,%xmm5,%xmm21
	vextracti32x4	$0,%zmm21,%xmm7
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_80





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_80
.L_small_initial_partial_block_80:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_80:

	orq	%r8,%r8
	je	.L_after_reduction_80
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_80:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_14_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$242,%r15d
	jae	.L_16_blocks_overflow_81
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%ymm27,%ymm4,%ymm5
	jmp	.L_16_blocks_ok_81

.L_16_blocks_overflow_81:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
.L_16_blocks_ok_81:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%ymm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%ymm30,%ymm5,%ymm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%ymm21,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%ymm29,%ymm5,%ymm21
	vextracti32x4	$1,%zmm21,%xmm7
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_82





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_82
.L_small_initial_partial_block_82:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_82:

	orq	%r8,%r8
	je	.L_after_reduction_82
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_82:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_15_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$241,%r15d
	jae	.L_16_blocks_overflow_83
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_83

.L_16_blocks_overflow_83:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_83:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$2,%zmm21,%xmm7
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_84





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_84
.L_small_initial_partial_block_84:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_84:

	orq	%r8,%r8
	je	.L_after_reduction_84
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_84:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_16_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$240,%r15d
	jae	.L_16_blocks_overflow_85
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_85

.L_16_blocks_overflow_85:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_85:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$3,%zmm21,%xmm7
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_86:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_86:
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_86:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_0_54:
	vmovdqa64	768(%rsp),%zmm13
	vpxorq	%zmm14,%zmm13,%zmm13
	vmovdqu64	0(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	832(%rsp),%zmm13
	vmovdqu64	64(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpxorq	%zmm10,%zmm4,%zmm26
	vpxorq	%zmm6,%zmm0,%zmm24
	vpxorq	%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	896(%rsp),%zmm13
	vmovdqu64	128(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	960(%rsp),%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

.L_last_blocks_done_54:
	vpshufb	%xmm29,%xmm2,%xmm2
	jmp	.L_ghash_done_10
.L_encrypt_16_blocks_10:
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_87
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_87
.L_16_blocks_overflow_87:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_87:
	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp),%zmm18
	vmovdqa64	832(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp),%zmm1
	vmovdqa64	896(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp),%zmm18
	vmovdqa64	960(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm15,%zmm10,%zmm26
	vpxorq	%zmm12,%zmm6,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1)
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqa64	%zmm0,1280(%rsp)
	vmovdqa64	%zmm3,1344(%rsp)
	vmovdqa64	%zmm4,1408(%rsp)
	vmovdqa64	%zmm5,1472(%rsp)
	vmovdqa64	1024(%rsp),%zmm13
	vmovdqu64	256(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1088(%rsp),%zmm13
	vmovdqu64	320(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1152(%rsp),%zmm13
	vmovdqu64	384(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1216(%rsp),%zmm13
	vmovdqu64	448(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	subq	$256,%r8
	addq	$256,%r11
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_88

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_88
	jb	.L_last_num_blocks_is_7_1_88


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_88
	jb	.L_last_num_blocks_is_11_9_88


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_88
	ja	.L_last_num_blocks_is_16_88
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_88
	jmp	.L_last_num_blocks_is_13_88

.L_last_num_blocks_is_11_9_88:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_88
	ja	.L_last_num_blocks_is_11_88
	jmp	.L_last_num_blocks_is_9_88

.L_last_num_blocks_is_7_1_88:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_88
	jb	.L_last_num_blocks_is_3_1_88

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_88
	je	.L_last_num_blocks_is_6_88
	jmp	.L_last_num_blocks_is_5_88

.L_last_num_blocks_is_3_1_88:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_88
	je	.L_last_num_blocks_is_2_88
.L_last_num_blocks_is_1_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_89
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_89

.L_16_blocks_overflow_89:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_89:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%xmm31,%xmm0,%xmm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%xmm29,%xmm0,%xmm17
	vextracti32x4	$0,%zmm17,%xmm7


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_90





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_90
.L_small_initial_partial_block_90:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)











	vpxorq	%xmm7,%xmm14,%xmm14

	jmp	.L_after_reduction_90
.L_small_initial_compute_done_90:
.L_after_reduction_90:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_2_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$254,%r15d
	jae	.L_16_blocks_overflow_91
	vpaddd	%ymm28,%ymm2,%ymm0
	jmp	.L_16_blocks_ok_91

.L_16_blocks_overflow_91:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%ymm29,%ymm0,%ymm0
.L_16_blocks_ok_91:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%ymm17{%k1}{z}
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%ymm31,%ymm0,%ymm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%ymm30,%ymm0,%ymm0
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%ymm29,%ymm0,%ymm17
	vextracti32x4	$1,%zmm17,%xmm7
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_92





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_92
.L_small_initial_partial_block_92:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_92:

	orq	%r8,%r8
	je	.L_after_reduction_92
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_92:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_3_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$253,%r15d
	jae	.L_16_blocks_overflow_93
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_93

.L_16_blocks_overflow_93:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_93:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$2,%zmm17,%xmm7
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_94





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_94
.L_small_initial_partial_block_94:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_94:

	orq	%r8,%r8
	je	.L_after_reduction_94
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_94:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_4_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$252,%r15d
	jae	.L_16_blocks_overflow_95
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_95

.L_16_blocks_overflow_95:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_95:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$3,%zmm17,%xmm7
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_96





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_96
.L_small_initial_partial_block_96:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_96:

	orq	%r8,%r8
	je	.L_after_reduction_96
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_96:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_5_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$251,%r15d
	jae	.L_16_blocks_overflow_97
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%xmm27,%xmm0,%xmm3
	jmp	.L_16_blocks_ok_97

.L_16_blocks_overflow_97:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
.L_16_blocks_ok_97:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%xmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%xmm30,%xmm3,%xmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%xmm19,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%xmm29,%xmm3,%xmm19
	vextracti32x4	$0,%zmm19,%xmm7
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_98





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_98
.L_small_initial_partial_block_98:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_98:

	orq	%r8,%r8
	je	.L_after_reduction_98
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_98:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_6_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$250,%r15d
	jae	.L_16_blocks_overflow_99
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%ymm27,%ymm0,%ymm3
	jmp	.L_16_blocks_ok_99

.L_16_blocks_overflow_99:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
.L_16_blocks_ok_99:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%ymm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%ymm30,%ymm3,%ymm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%ymm29,%ymm3,%ymm19
	vextracti32x4	$1,%zmm19,%xmm7
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_100





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_100
.L_small_initial_partial_block_100:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_100:

	orq	%r8,%r8
	je	.L_after_reduction_100
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_100:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_7_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$249,%r15d
	jae	.L_16_blocks_overflow_101
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_101

.L_16_blocks_overflow_101:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_101:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$2,%zmm19,%xmm7
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_102





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_102
.L_small_initial_partial_block_102:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_102:

	orq	%r8,%r8
	je	.L_after_reduction_102
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_102:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_8_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$248,%r15d
	jae	.L_16_blocks_overflow_103
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_103

.L_16_blocks_overflow_103:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_103:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$3,%zmm19,%xmm7
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_104





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_104
.L_small_initial_partial_block_104:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_104:

	orq	%r8,%r8
	je	.L_after_reduction_104
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_104:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_9_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$247,%r15d
	jae	.L_16_blocks_overflow_105
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%xmm27,%xmm3,%xmm4
	jmp	.L_16_blocks_ok_105

.L_16_blocks_overflow_105:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
.L_16_blocks_ok_105:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%xmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%xmm30,%xmm4,%xmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%xmm20,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%xmm29,%xmm4,%xmm20
	vextracti32x4	$0,%zmm20,%xmm7
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_106





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_106
.L_small_initial_partial_block_106:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_106:

	orq	%r8,%r8
	je	.L_after_reduction_106
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_106:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_10_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$246,%r15d
	jae	.L_16_blocks_overflow_107
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%ymm27,%ymm3,%ymm4
	jmp	.L_16_blocks_ok_107

.L_16_blocks_overflow_107:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
.L_16_blocks_ok_107:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%ymm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%ymm30,%ymm4,%ymm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%ymm20,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%ymm29,%ymm4,%ymm20
	vextracti32x4	$1,%zmm20,%xmm7
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_108





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_108
.L_small_initial_partial_block_108:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_108:

	orq	%r8,%r8
	je	.L_after_reduction_108
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_108:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_11_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$245,%r15d
	jae	.L_16_blocks_overflow_109
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_109

.L_16_blocks_overflow_109:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_109:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$2,%zmm20,%xmm7
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_110





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_110
.L_small_initial_partial_block_110:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_110:

	orq	%r8,%r8
	je	.L_after_reduction_110
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_110:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_12_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$244,%r15d
	jae	.L_16_blocks_overflow_111
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_111

.L_16_blocks_overflow_111:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_111:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$3,%zmm20,%xmm7
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_112





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_112
.L_small_initial_partial_block_112:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_112:

	orq	%r8,%r8
	je	.L_after_reduction_112
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_112:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_13_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$243,%r15d
	jae	.L_16_blocks_overflow_113
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%xmm27,%xmm4,%xmm5
	jmp	.L_16_blocks_ok_113

.L_16_blocks_overflow_113:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
.L_16_blocks_ok_113:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%xmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%xmm30,%xmm5,%xmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%xmm21,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%xmm29,%xmm5,%xmm21
	vextracti32x4	$0,%zmm21,%xmm7
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_114





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_114
.L_small_initial_partial_block_114:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_114:

	orq	%r8,%r8
	je	.L_after_reduction_114
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_114:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_14_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$242,%r15d
	jae	.L_16_blocks_overflow_115
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%ymm27,%ymm4,%ymm5
	jmp	.L_16_blocks_ok_115

.L_16_blocks_overflow_115:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
.L_16_blocks_ok_115:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%ymm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%ymm30,%ymm5,%ymm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%ymm21,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%ymm29,%ymm5,%ymm21
	vextracti32x4	$1,%zmm21,%xmm7
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_116





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_116
.L_small_initial_partial_block_116:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_116:

	orq	%r8,%r8
	je	.L_after_reduction_116
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_116:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_15_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$241,%r15d
	jae	.L_16_blocks_overflow_117
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_117

.L_16_blocks_overflow_117:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_117:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$2,%zmm21,%xmm7
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_118





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_118
.L_small_initial_partial_block_118:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_118:

	orq	%r8,%r8
	je	.L_after_reduction_118
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_118:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_16_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$240,%r15d
	jae	.L_16_blocks_overflow_119
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_119

.L_16_blocks_overflow_119:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_119:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$3,%zmm21,%xmm7
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_120:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_120:
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_120:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_0_88:
	vmovdqa64	1280(%rsp),%zmm13
	vmovdqu64	512(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1344(%rsp),%zmm13
	vmovdqu64	576(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1408(%rsp),%zmm13
	vmovdqu64	640(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1472(%rsp),%zmm13
	vmovdqu64	704(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

.L_last_blocks_done_88:
	vpshufb	%xmm29,%xmm2,%xmm2
	jmp	.L_ghash_done_10

.L_message_below_32_blocks_10:


	subq	$256,%r8
	addq	$256,%r11
	movl	%r8d,%r10d
	testq	%r14,%r14
	jnz	.L_skip_hkeys_precomputation_121
	vmovdqu64	640(%rsp),%zmm3


	vshufi64x2	$0x00,%zmm3,%zmm3,%zmm3

	vmovdqu64	576(%rsp),%zmm4
	vmovdqu64	512(%rsp),%zmm5

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,448(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,384(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,320(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,256(%rsp)
.L_skip_hkeys_precomputation_121:
	movq	$1,%r14
	andl	$~15,%r10d
	movl	$512,%ebx
	subl	%r10d,%ebx
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_122

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_122
	jb	.L_last_num_blocks_is_7_1_122


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_122
	jb	.L_last_num_blocks_is_11_9_122


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_122
	ja	.L_last_num_blocks_is_16_122
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_122
	jmp	.L_last_num_blocks_is_13_122

.L_last_num_blocks_is_11_9_122:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_122
	ja	.L_last_num_blocks_is_11_122
	jmp	.L_last_num_blocks_is_9_122

.L_last_num_blocks_is_7_1_122:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_122
	jb	.L_last_num_blocks_is_3_1_122

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_122
	je	.L_last_num_blocks_is_6_122
	jmp	.L_last_num_blocks_is_5_122

.L_last_num_blocks_is_3_1_122:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_122
	je	.L_last_num_blocks_is_2_122
.L_last_num_blocks_is_1_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_123
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_123

.L_16_blocks_overflow_123:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_123:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%xmm29,%xmm0,%xmm17
	vextracti32x4	$0,%zmm17,%xmm7


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_124





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_124
.L_small_initial_partial_block_124:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)


	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm0


	vpclmulqdq	$0x01,%xmm25,%xmm0,%xmm3
	vpslldq	$8,%xmm3,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm3


	vpclmulqdq	$0x00,%xmm3,%xmm0,%xmm4
	vpsrldq	$4,%xmm4,%xmm4
	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm4,%xmm14












	vpxorq	%xmm7,%xmm14,%xmm14

	jmp	.L_after_reduction_124
.L_small_initial_compute_done_124:
.L_after_reduction_124:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_2_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$254,%r15d
	jae	.L_16_blocks_overflow_125
	vpaddd	%ymm28,%ymm2,%ymm0
	jmp	.L_16_blocks_ok_125

.L_16_blocks_overflow_125:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%ymm29,%ymm0,%ymm0
.L_16_blocks_ok_125:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%ymm17{%k1}{z}
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vaesenclast	%ymm30,%ymm0,%ymm0
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%ymm29,%ymm0,%ymm17
	vextracti32x4	$1,%zmm17,%xmm7
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_126





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_126
.L_small_initial_partial_block_126:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_126:

	orq	%r8,%r8
	je	.L_after_reduction_126
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_126:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_3_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$253,%r15d
	jae	.L_16_blocks_overflow_127
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_127

.L_16_blocks_overflow_127:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_127:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$2,%zmm17,%xmm7
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_128





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_128
.L_small_initial_partial_block_128:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_128:

	orq	%r8,%r8
	je	.L_after_reduction_128
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_128:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_4_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$252,%r15d
	jae	.L_16_blocks_overflow_129
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_129

.L_16_blocks_overflow_129:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_129:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$3,%zmm17,%xmm7
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_130





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_130
.L_small_initial_partial_block_130:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_130:

	orq	%r8,%r8
	je	.L_after_reduction_130
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_130:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_5_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$251,%r15d
	jae	.L_16_blocks_overflow_131
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%xmm27,%xmm0,%xmm3
	jmp	.L_16_blocks_ok_131

.L_16_blocks_overflow_131:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
.L_16_blocks_ok_131:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%xmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%xmm30,%xmm3,%xmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%xmm19,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%xmm29,%xmm3,%xmm19
	vextracti32x4	$0,%zmm19,%xmm7
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_132





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_132
.L_small_initial_partial_block_132:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_132:

	orq	%r8,%r8
	je	.L_after_reduction_132
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_132:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_6_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$250,%r15d
	jae	.L_16_blocks_overflow_133
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%ymm27,%ymm0,%ymm3
	jmp	.L_16_blocks_ok_133

.L_16_blocks_overflow_133:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
.L_16_blocks_ok_133:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%ymm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%ymm30,%ymm3,%ymm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%ymm29,%ymm3,%ymm19
	vextracti32x4	$1,%zmm19,%xmm7
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_134





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_134
.L_small_initial_partial_block_134:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_134:

	orq	%r8,%r8
	je	.L_after_reduction_134
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_134:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_7_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$249,%r15d
	jae	.L_16_blocks_overflow_135
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_135

.L_16_blocks_overflow_135:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_135:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$2,%zmm19,%xmm7
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_136





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_136
.L_small_initial_partial_block_136:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_136:

	orq	%r8,%r8
	je	.L_after_reduction_136
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_136:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_8_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$248,%r15d
	jae	.L_16_blocks_overflow_137
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_137

.L_16_blocks_overflow_137:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_137:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$3,%zmm19,%xmm7
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_138





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_138
.L_small_initial_partial_block_138:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_138:

	orq	%r8,%r8
	je	.L_after_reduction_138
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_138:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_9_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$247,%r15d
	jae	.L_16_blocks_overflow_139
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%xmm27,%xmm3,%xmm4
	jmp	.L_16_blocks_ok_139

.L_16_blocks_overflow_139:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
.L_16_blocks_ok_139:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%xmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%xmm30,%xmm4,%xmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%xmm20,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%xmm29,%xmm4,%xmm20
	vextracti32x4	$0,%zmm20,%xmm7
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_140





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_140
.L_small_initial_partial_block_140:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_140:

	orq	%r8,%r8
	je	.L_after_reduction_140
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_140:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_10_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$246,%r15d
	jae	.L_16_blocks_overflow_141
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%ymm27,%ymm3,%ymm4
	jmp	.L_16_blocks_ok_141

.L_16_blocks_overflow_141:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
.L_16_blocks_ok_141:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%ymm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%ymm30,%ymm4,%ymm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%ymm20,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%ymm29,%ymm4,%ymm20
	vextracti32x4	$1,%zmm20,%xmm7
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_142





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_142
.L_small_initial_partial_block_142:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_142:

	orq	%r8,%r8
	je	.L_after_reduction_142
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_142:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_11_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$245,%r15d
	jae	.L_16_blocks_overflow_143
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_143

.L_16_blocks_overflow_143:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_143:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$2,%zmm20,%xmm7
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_144





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_144
.L_small_initial_partial_block_144:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_144:

	orq	%r8,%r8
	je	.L_after_reduction_144
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_144:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_12_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$244,%r15d
	jae	.L_16_blocks_overflow_145
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_145

.L_16_blocks_overflow_145:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_145:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$3,%zmm20,%xmm7
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_146





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_146
.L_small_initial_partial_block_146:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_146:

	orq	%r8,%r8
	je	.L_after_reduction_146
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_146:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_13_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$243,%r15d
	jae	.L_16_blocks_overflow_147
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%xmm27,%xmm4,%xmm5
	jmp	.L_16_blocks_ok_147

.L_16_blocks_overflow_147:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
.L_16_blocks_ok_147:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%xmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%xmm30,%xmm5,%xmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%xmm21,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%xmm29,%xmm5,%xmm21
	vextracti32x4	$0,%zmm21,%xmm7
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_148





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_148
.L_small_initial_partial_block_148:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_148:

	orq	%r8,%r8
	je	.L_after_reduction_148
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_148:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_14_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$242,%r15d
	jae	.L_16_blocks_overflow_149
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%ymm27,%ymm4,%ymm5
	jmp	.L_16_blocks_ok_149

.L_16_blocks_overflow_149:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
.L_16_blocks_ok_149:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%ymm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%ymm30,%ymm5,%ymm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%ymm21,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%ymm29,%ymm5,%ymm21
	vextracti32x4	$1,%zmm21,%xmm7
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_150





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_150
.L_small_initial_partial_block_150:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_150:

	orq	%r8,%r8
	je	.L_after_reduction_150
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_150:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_15_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$241,%r15d
	jae	.L_16_blocks_overflow_151
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_151

.L_16_blocks_overflow_151:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_151:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$2,%zmm21,%xmm7
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_152





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_152
.L_small_initial_partial_block_152:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_152:

	orq	%r8,%r8
	je	.L_after_reduction_152
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_152:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_16_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$240,%r15d
	jae	.L_16_blocks_overflow_153
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_153

.L_16_blocks_overflow_153:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_153:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$3,%zmm21,%xmm7
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_154:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_154:
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_154:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_0_122:
	vmovdqa64	768(%rsp),%zmm13
	vpxorq	%zmm14,%zmm13,%zmm13
	vmovdqu64	0(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	832(%rsp),%zmm13
	vmovdqu64	64(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpxorq	%zmm10,%zmm4,%zmm26
	vpxorq	%zmm6,%zmm0,%zmm24
	vpxorq	%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	896(%rsp),%zmm13
	vmovdqu64	128(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	960(%rsp),%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

.L_last_blocks_done_122:
	vpshufb	%xmm29,%xmm2,%xmm2
	jmp	.L_ghash_done_10

.L_message_below_equal_16_blocks_10:


	movl	%r8d,%r12d
	addl	$15,%r12d
	shrl	$4,%r12d
	cmpq	$8,%r12
	je	.L_small_initial_num_blocks_is_8_155
	jl	.L_small_initial_num_blocks_is_7_1_155


	cmpq	$12,%r12
	je	.L_small_initial_num_blocks_is_12_155
	jl	.L_small_initial_num_blocks_is_11_9_155


	cmpq	$16,%r12
	je	.L_small_initial_num_blocks_is_16_155
	cmpq	$15,%r12
	je	.L_small_initial_num_blocks_is_15_155
	cmpq	$14,%r12
	je	.L_small_initial_num_blocks_is_14_155
	jmp	.L_small_initial_num_blocks_is_13_155

.L_small_initial_num_blocks_is_11_9_155:

	cmpq	$11,%r12
	je	.L_small_initial_num_blocks_is_11_155
	cmpq	$10,%r12
	je	.L_small_initial_num_blocks_is_10_155
	jmp	.L_small_initial_num_blocks_is_9_155

.L_small_initial_num_blocks_is_7_1_155:
	cmpq	$4,%r12
	je	.L_small_initial_num_blocks_is_4_155
	jl	.L_small_initial_num_blocks_is_3_1_155

	cmpq	$7,%r12
	je	.L_small_initial_num_blocks_is_7_155
	cmpq	$6,%r12
	je	.L_small_initial_num_blocks_is_6_155
	jmp	.L_small_initial_num_blocks_is_5_155

.L_small_initial_num_blocks_is_3_1_155:

	cmpq	$3,%r12
	je	.L_small_initial_num_blocks_is_3_155
	cmpq	$2,%r12
	je	.L_small_initial_num_blocks_is_2_155





.L_small_initial_num_blocks_is_1_155:
	vmovdqa64	SHUF_MASK(%rip),%xmm29
	vpaddd	ONE(%rip),%xmm2,%xmm0
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$0,%zmm0,%xmm2
	vpshufb	%xmm29,%xmm0,%xmm0
	vmovdqu8	0(%rcx,%r11,1),%xmm6{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%xmm15,%xmm0,%xmm0
	vpxorq	%xmm6,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm12
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%xmm29,%xmm0,%xmm6
	vextracti32x4	$0,%zmm6,%xmm13


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_156





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm6,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm6,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm6,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm6,%xmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_156
.L_small_initial_partial_block_156:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)











	vpxorq	%xmm13,%xmm14,%xmm14

	jmp	.L_after_reduction_156
.L_small_initial_compute_done_156:
.L_after_reduction_156:
	jmp	.L_small_initial_blocks_encrypted_155
.L_small_initial_num_blocks_is_2_155:
	vmovdqa64	SHUF_MASK(%rip),%ymm29
	vshufi64x2	$0,%ymm2,%ymm2,%ymm0
	vpaddd	ddq_add_1234(%rip),%ymm0,%ymm0
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$1,%zmm0,%xmm2
	vpshufb	%ymm29,%ymm0,%ymm0
	vmovdqu8	0(%rcx,%r11,1),%ymm6{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%ymm15,%ymm0,%ymm0
	vpxorq	%ymm6,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm12
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%ymm29,%ymm0,%ymm6
	vextracti32x4	$1,%zmm6,%xmm13
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_157





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm6,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm6,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm6,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm6,%ymm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_157
.L_small_initial_partial_block_157:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm6,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm6,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm6,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm6,%xmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_157:

	orq	%r8,%r8
	je	.L_after_reduction_157
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_157:
	jmp	.L_small_initial_blocks_encrypted_155
.L_small_initial_num_blocks_is_3_155:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$2,%zmm0,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vmovdqu8	0(%rcx,%r11,1),%zmm6{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm6,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vextracti32x4	$2,%zmm6,%xmm13
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_158





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_158
.L_small_initial_partial_block_158:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm6,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm6,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm6,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm6,%ymm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_158:

	orq	%r8,%r8
	je	.L_after_reduction_158
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_158:
	jmp	.L_small_initial_blocks_encrypted_155
.L_small_initial_num_blocks_is_4_155:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$3,%zmm0,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vmovdqu8	0(%rcx,%r11,1),%zmm6{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm6,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vextracti32x4	$3,%zmm6,%xmm13
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_159





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_159
.L_small_initial_partial_block_159:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_159:

	orq	%r8,%r8
	je	.L_after_reduction_159
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_159:
	jmp	.L_small_initial_blocks_encrypted_155
.L_small_initial_num_blocks_is_5_155:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$64,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$0,%zmm3,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%xmm7{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%xmm15,%xmm3,%xmm3
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%xmm7,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%xmm29,%xmm3,%xmm7
	vextracti32x4	$0,%zmm7,%xmm13
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_160





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm7,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm7,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm7,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm7,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_160
.L_small_initial_partial_block_160:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_160:

	orq	%r8,%r8
	je	.L_after_reduction_160
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_160:
	jmp	.L_small_initial_blocks_encrypted_155
.L_small_initial_num_blocks_is_6_155:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$64,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$1,%zmm3,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%ymm7{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%ymm15,%ymm3,%ymm3
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%ymm7,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%ymm29,%ymm3,%ymm7
	vextracti32x4	$1,%zmm7,%xmm13
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_161





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm7,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm7,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm7,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm7,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_161
.L_small_initial_partial_block_161:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm7,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm7,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm7,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm7,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_161:

	orq	%r8,%r8
	je	.L_after_reduction_161
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_161:
	jmp	.L_small_initial_blocks_encrypted_155
.L_small_initial_num_blocks_is_7_155:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$64,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$2,%zmm3,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vextracti32x4	$2,%zmm7,%xmm13
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_162





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_162
.L_small_initial_partial_block_162:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm7,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm7,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm7,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm7,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_162:

	orq	%r8,%r8
	je	.L_after_reduction_162
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_162:
	jmp	.L_small_initial_blocks_encrypted_155
.L_small_initial_num_blocks_is_8_155:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$64,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$3,%zmm3,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vextracti32x4	$3,%zmm7,%xmm13
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_163





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	224(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_163
.L_small_initial_partial_block_163:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_163:

	orq	%r8,%r8
	je	.L_after_reduction_163
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_163:
	jmp	.L_small_initial_blocks_encrypted_155
.L_small_initial_num_blocks_is_9_155:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$128,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$0,%zmm4,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%xmm10{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%xmm15,%xmm4,%xmm4
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%xmm10,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%xmm29,%xmm4,%xmm10
	vextracti32x4	$0,%zmm10,%xmm13
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_164





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	208(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm10,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm10,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm10,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm10,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_164
.L_small_initial_partial_block_164:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	224(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_164:

	orq	%r8,%r8
	je	.L_after_reduction_164
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_164:
	jmp	.L_small_initial_blocks_encrypted_155
.L_small_initial_num_blocks_is_10_155:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$128,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$1,%zmm4,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%ymm10{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%ymm15,%ymm4,%ymm4
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%ymm10,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%ymm29,%ymm4,%ymm10
	vextracti32x4	$1,%zmm10,%xmm13
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_165





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	192(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm10,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm10,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm10,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm10,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_165
.L_small_initial_partial_block_165:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	208(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm10,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm10,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm10,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm10,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_165:

	orq	%r8,%r8
	je	.L_after_reduction_165
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_165:
	jmp	.L_small_initial_blocks_encrypted_155
.L_small_initial_num_blocks_is_11_155:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$128,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$2,%zmm4,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%zmm29,%zmm4,%zmm10
	vextracti32x4	$2,%zmm10,%xmm13
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_166





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	176(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_166
.L_small_initial_partial_block_166:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	192(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm10,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm10,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm10,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm10,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_166:

	orq	%r8,%r8
	je	.L_after_reduction_166
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_166:
	jmp	.L_small_initial_blocks_encrypted_155
.L_small_initial_num_blocks_is_12_155:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$128,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$3,%zmm4,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%zmm29,%zmm4,%zmm10
	vextracti32x4	$3,%zmm10,%xmm13
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_167





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	160(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	224(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_167
.L_small_initial_partial_block_167:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	176(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_167:

	orq	%r8,%r8
	je	.L_after_reduction_167
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_167:
	jmp	.L_small_initial_blocks_encrypted_155
.L_small_initial_num_blocks_is_13_155:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	vpaddd	ddq_add_8888(%rip),%zmm3,%zmm5
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$192,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$0,%zmm5,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10
	vmovdqu8	192(%rcx,%r11,1),%xmm11{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vpxorq	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vaesenclast	%xmm15,%xmm5,%xmm5
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vpxorq	%xmm11,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%zmm29,%zmm4,%zmm10
	vpshufb	%xmm29,%xmm5,%xmm11
	vextracti32x4	$0,%zmm11,%xmm13
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_168





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	144(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	208(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm11,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm11,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm11,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm11,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_168
.L_small_initial_partial_block_168:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	160(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	224(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_168:

	orq	%r8,%r8
	je	.L_after_reduction_168
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_168:
	jmp	.L_small_initial_blocks_encrypted_155
.L_small_initial_num_blocks_is_14_155:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	vpaddd	ddq_add_8888(%rip),%zmm3,%zmm5
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$192,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$1,%zmm5,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10
	vmovdqu8	192(%rcx,%r11,1),%ymm11{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vpxorq	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vaesenclast	%ymm15,%ymm5,%ymm5
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vpxorq	%ymm11,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%zmm29,%zmm4,%zmm10
	vpshufb	%ymm29,%ymm5,%ymm11
	vextracti32x4	$1,%zmm11,%xmm13
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_169





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	128(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	192(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm11,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm11,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm11,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm11,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_169
.L_small_initial_partial_block_169:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	144(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	208(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm11,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm11,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm11,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm11,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_169:

	orq	%r8,%r8
	je	.L_after_reduction_169
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_169:
	jmp	.L_small_initial_blocks_encrypted_155
.L_small_initial_num_blocks_is_15_155:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	vpaddd	ddq_add_8888(%rip),%zmm3,%zmm5
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$192,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$2,%zmm5,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10
	vmovdqu8	192(%rcx,%r11,1),%zmm11{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vpxorq	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vaesenclast	%zmm15,%zmm5,%zmm5
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vpxorq	%zmm11,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%zmm29,%zmm4,%zmm10
	vpshufb	%zmm29,%zmm5,%zmm11
	vextracti32x4	$2,%zmm11,%xmm13
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_170





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	112(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	176(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm11,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm11,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm11,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm11,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_170
.L_small_initial_partial_block_170:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	128(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	192(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm11,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm11,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm11,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm11,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_170:

	orq	%r8,%r8
	je	.L_after_reduction_170
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_170:
	jmp	.L_small_initial_blocks_encrypted_155
.L_small_initial_num_blocks_is_16_155:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	vpaddd	ddq_add_8888(%rip),%zmm3,%zmm5
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$192,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$3,%zmm5,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10
	vmovdqu8	192(%rcx,%r11,1),%zmm11{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vpxorq	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vaesenclast	%zmm15,%zmm5,%zmm5
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vpxorq	%zmm11,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%zmm29,%zmm4,%zmm10
	vpshufb	%zmm29,%zmm5,%zmm11
	vextracti32x4	$3,%zmm11,%xmm13
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_171:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	112(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	176(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm11,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm11,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm11,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm11,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_171:
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_171:
.L_small_initial_blocks_encrypted_155:
.L_ghash_done_10:
	vmovdqu64	%xmm2,0(%rsi)
	vmovdqu64	%xmm14,64(%rsi)
.L_enc_dec_done_10:
	jmp	.Lexit_gcm_encrypt
.align	32
.Laes_gcm_encrypt_192_avx512:
	orq	%r8,%r8
	je	.L_enc_dec_done_172
	xorq	%r14,%r14
	vmovdqu64	64(%rsi),%xmm14

	movq	(%rdx),%r11
	orq	%r11,%r11
	je	.L_partial_block_done_173
	movl	$16,%r10d
	leaq	byte_len_to_mask_table(%rip),%r12
	cmpq	%r10,%r8
	cmovcq	%r8,%r10
	kmovw	(%r12,%r10,2),%k1
	vmovdqu8	(%rcx),%xmm0{%k1}{z}

	vmovdqu64	16(%rsi),%xmm3
	vmovdqu64	336(%rsi),%xmm4



	leaq	SHIFT_MASK(%rip),%r12
	addq	%r11,%r12
	vmovdqu64	(%r12),%xmm5
	vpshufb	%xmm5,%xmm3,%xmm3
	vpxorq	%xmm0,%xmm3,%xmm3


	leaq	(%r8,%r11,1),%r13
	subq	$16,%r13
	jge	.L_no_extra_mask_173
	subq	%r13,%r12
.L_no_extra_mask_173:



	vmovdqu64	16(%r12),%xmm0
	vpand	%xmm0,%xmm3,%xmm3
	vpshufb	SHUF_MASK(%rip),%xmm3,%xmm3
	vpshufb	%xmm5,%xmm3,%xmm3
	vpxorq	%xmm3,%xmm14,%xmm14
	cmpq	$0,%r13
	jl	.L_partial_incomplete_173

	vpclmulqdq	$0x11,%xmm4,%xmm14,%xmm7
	vpclmulqdq	$0x00,%xmm4,%xmm14,%xmm10
	vpclmulqdq	$0x01,%xmm4,%xmm14,%xmm11
	vpclmulqdq	$0x10,%xmm4,%xmm14,%xmm14
	vpxorq	%xmm11,%xmm14,%xmm14

	vpsrldq	$8,%xmm14,%xmm11
	vpslldq	$8,%xmm14,%xmm14
	vpxorq	%xmm11,%xmm7,%xmm7
	vpxorq	%xmm10,%xmm14,%xmm14



	vmovdqu64	POLY2(%rip),%xmm11

	vpclmulqdq	$0x01,%xmm14,%xmm11,%xmm10
	vpslldq	$8,%xmm10,%xmm10
	vpxorq	%xmm10,%xmm14,%xmm14



	vpclmulqdq	$0x00,%xmm14,%xmm11,%xmm10
	vpsrldq	$4,%xmm10,%xmm10
	vpclmulqdq	$0x10,%xmm14,%xmm11,%xmm14
	vpslldq	$4,%xmm14,%xmm14

	vpternlogq	$0x96,%xmm10,%xmm7,%xmm14

	movq	$0,(%rdx)

	movq	%r11,%r12
	movq	$16,%r11
	subq	%r12,%r11
	jmp	.L_enc_dec_done_173

.L_partial_incomplete_173:
	addq	%r8,(%rdx)
	movq	%r8,%r11

.L_enc_dec_done_173:


	leaq	byte_len_to_mask_table(%rip),%r12
	kmovw	(%r12,%r11,2),%k1
	vmovdqu64	%xmm14,64(%rsi)

	vpshufb	SHUF_MASK(%rip),%xmm3,%xmm3
	vpshufb	%xmm5,%xmm3,%xmm3
	movq	%r9,%r12
	vmovdqu8	%xmm3,(%r12){%k1}
.L_partial_block_done_173:
	vmovdqu64	0(%rsi),%xmm2
	subq	%r11,%r8
	je	.L_enc_dec_done_172
	cmpq	$256,%r8
	jbe	.L_message_below_equal_16_blocks_172

	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vmovdqa64	ddq_addbe_4444(%rip),%zmm27
	vmovdqa64	ddq_addbe_1234(%rip),%zmm28






	vmovd	%xmm2,%r15d
	andl	$255,%r15d

	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpshufb	%zmm29,%zmm2,%zmm2



	cmpb	$240,%r15b
	jae	.L_next_16_overflow_174
	vpaddd	%zmm28,%zmm2,%zmm7
	vpaddd	%zmm27,%zmm7,%zmm10
	vpaddd	%zmm27,%zmm10,%zmm11
	vpaddd	%zmm27,%zmm11,%zmm12
	jmp	.L_next_16_ok_174
.L_next_16_overflow_174:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm12
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm7
	vpaddd	%zmm12,%zmm7,%zmm10
	vpaddd	%zmm12,%zmm10,%zmm11
	vpaddd	%zmm12,%zmm11,%zmm12
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vpshufb	%zmm29,%zmm12,%zmm12
.L_next_16_ok_174:
	vshufi64x2	$255,%zmm12,%zmm12,%zmm2
	addb	$16,%r15b

	vmovdqu8	0(%rcx,%r11,1),%zmm0
	vmovdqu8	64(%rcx,%r11,1),%zmm3
	vmovdqu8	128(%rcx,%r11,1),%zmm4
	vmovdqu8	192(%rcx,%r11,1),%zmm5


	vbroadcastf64x2	0(%rdi),%zmm6
	vpxorq	%zmm6,%zmm7,%zmm7
	vpxorq	%zmm6,%zmm10,%zmm10
	vpxorq	%zmm6,%zmm11,%zmm11
	vpxorq	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	16(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	32(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	48(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	64(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	80(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	96(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	112(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	128(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	144(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	160(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	176(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	192(%rdi),%zmm6
	vaesenclast	%zmm6,%zmm7,%zmm7
	vaesenclast	%zmm6,%zmm10,%zmm10
	vaesenclast	%zmm6,%zmm11,%zmm11
	vaesenclast	%zmm6,%zmm12,%zmm12


	vpxorq	%zmm0,%zmm7,%zmm7
	vpxorq	%zmm3,%zmm10,%zmm10
	vpxorq	%zmm4,%zmm11,%zmm11
	vpxorq	%zmm5,%zmm12,%zmm12


	movq	%r9,%r10
	vmovdqu8	%zmm7,0(%r10,%r11,1)
	vmovdqu8	%zmm10,64(%r10,%r11,1)
	vmovdqu8	%zmm11,128(%r10,%r11,1)
	vmovdqu8	%zmm12,192(%r10,%r11,1)

	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vpshufb	%zmm29,%zmm12,%zmm12
	vmovdqa64	%zmm7,768(%rsp)
	vmovdqa64	%zmm10,832(%rsp)
	vmovdqa64	%zmm11,896(%rsp)
	vmovdqa64	%zmm12,960(%rsp)
	testq	%r14,%r14
	jnz	.L_skip_hkeys_precomputation_175

	vmovdqu64	288(%rsi),%zmm0
	vmovdqu64	%zmm0,704(%rsp)

	vmovdqu64	224(%rsi),%zmm3
	vmovdqu64	%zmm3,640(%rsp)


	vshufi64x2	$0x00,%zmm3,%zmm3,%zmm3

	vmovdqu64	160(%rsi),%zmm4
	vmovdqu64	%zmm4,576(%rsp)

	vmovdqu64	96(%rsi),%zmm5
	vmovdqu64	%zmm5,512(%rsp)
.L_skip_hkeys_precomputation_175:
	cmpq	$512,%r8
	jb	.L_message_below_32_blocks_172



	cmpb	$240,%r15b
	jae	.L_next_16_overflow_176
	vpaddd	%zmm28,%zmm2,%zmm7
	vpaddd	%zmm27,%zmm7,%zmm10
	vpaddd	%zmm27,%zmm10,%zmm11
	vpaddd	%zmm27,%zmm11,%zmm12
	jmp	.L_next_16_ok_176
.L_next_16_overflow_176:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm12
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm7
	vpaddd	%zmm12,%zmm7,%zmm10
	vpaddd	%zmm12,%zmm10,%zmm11
	vpaddd	%zmm12,%zmm11,%zmm12
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vpshufb	%zmm29,%zmm12,%zmm12
.L_next_16_ok_176:
	vshufi64x2	$255,%zmm12,%zmm12,%zmm2
	addb	$16,%r15b

	vmovdqu8	256(%rcx,%r11,1),%zmm0
	vmovdqu8	320(%rcx,%r11,1),%zmm3
	vmovdqu8	384(%rcx,%r11,1),%zmm4
	vmovdqu8	448(%rcx,%r11,1),%zmm5


	vbroadcastf64x2	0(%rdi),%zmm6
	vpxorq	%zmm6,%zmm7,%zmm7
	vpxorq	%zmm6,%zmm10,%zmm10
	vpxorq	%zmm6,%zmm11,%zmm11
	vpxorq	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	16(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	32(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	48(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	64(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	80(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	96(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	112(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	128(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	144(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	160(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	176(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	192(%rdi),%zmm6
	vaesenclast	%zmm6,%zmm7,%zmm7
	vaesenclast	%zmm6,%zmm10,%zmm10
	vaesenclast	%zmm6,%zmm11,%zmm11
	vaesenclast	%zmm6,%zmm12,%zmm12


	vpxorq	%zmm0,%zmm7,%zmm7
	vpxorq	%zmm3,%zmm10,%zmm10
	vpxorq	%zmm4,%zmm11,%zmm11
	vpxorq	%zmm5,%zmm12,%zmm12


	movq	%r9,%r10
	vmovdqu8	%zmm7,256(%r10,%r11,1)
	vmovdqu8	%zmm10,320(%r10,%r11,1)
	vmovdqu8	%zmm11,384(%r10,%r11,1)
	vmovdqu8	%zmm12,448(%r10,%r11,1)

	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vpshufb	%zmm29,%zmm12,%zmm12
	vmovdqa64	%zmm7,1024(%rsp)
	vmovdqa64	%zmm10,1088(%rsp)
	vmovdqa64	%zmm11,1152(%rsp)
	vmovdqa64	%zmm12,1216(%rsp)
	testq	%r14,%r14
	jnz	.L_skip_hkeys_precomputation_177
	vmovdqu64	640(%rsp),%zmm3


	vshufi64x2	$0x00,%zmm3,%zmm3,%zmm3

	vmovdqu64	576(%rsp),%zmm4
	vmovdqu64	512(%rsp),%zmm5

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,448(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,384(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,320(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,256(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,192(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,128(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,64(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,0(%rsp)
.L_skip_hkeys_precomputation_177:
	movq	$1,%r14
	addq	$512,%r11
	subq	$512,%r8

	cmpq	$768,%r8
	jb	.L_no_more_big_nblocks_172
.L_encrypt_big_nblocks_172:
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_178
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_178
.L_16_blocks_overflow_178:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_178:
	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp),%zmm18
	vmovdqa64	832(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp),%zmm1
	vmovdqa64	896(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp),%zmm18
	vmovdqa64	960(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm15,%zmm10,%zmm26
	vpxorq	%zmm12,%zmm6,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1)
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqa64	%zmm0,1280(%rsp)
	vmovdqa64	%zmm3,1344(%rsp)
	vmovdqa64	%zmm4,1408(%rsp)
	vmovdqa64	%zmm5,1472(%rsp)
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_179
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_179
.L_16_blocks_overflow_179:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_179:
	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	256(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	320(%rsp),%zmm18
	vmovdqa64	1088(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	384(%rsp),%zmm1
	vmovdqa64	1152(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	448(%rsp),%zmm18
	vmovdqa64	1216(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	256(%rcx,%r11,1),%zmm17
	vmovdqu8	320(%rcx,%r11,1),%zmm19
	vmovdqu8	384(%rcx,%r11,1),%zmm20
	vmovdqu8	448(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vpternlogq	$0x96,%zmm12,%zmm6,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,256(%r10,%r11,1)
	vmovdqu8	%zmm3,320(%r10,%r11,1)
	vmovdqu8	%zmm4,384(%r10,%r11,1)
	vmovdqu8	%zmm5,448(%r10,%r11,1)
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqa64	%zmm0,768(%rsp)
	vmovdqa64	%zmm3,832(%rsp)
	vmovdqa64	%zmm4,896(%rsp)
	vmovdqa64	%zmm5,960(%rsp)
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_180
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_180
.L_16_blocks_overflow_180:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_180:
	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	512(%rcx,%r11,1),%zmm17
	vmovdqu8	576(%rcx,%r11,1),%zmm19
	vmovdqu8	640(%rcx,%r11,1),%zmm20
	vmovdqu8	704(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30


	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10

	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpternlogq	$0x96,%zmm15,%zmm12,%zmm6
	vpxorq	%zmm24,%zmm6,%zmm6
	vpternlogq	$0x96,%zmm10,%zmm13,%zmm7
	vpxorq	%zmm25,%zmm7,%zmm7
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vextracti64x4	$1,%zmm6,%ymm12
	vpxorq	%ymm12,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm12
	vpxorq	%xmm12,%xmm6,%xmm6
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm6
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,512(%r10,%r11,1)
	vmovdqu8	%zmm3,576(%r10,%r11,1)
	vmovdqu8	%zmm4,640(%r10,%r11,1)
	vmovdqu8	%zmm5,704(%r10,%r11,1)
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqa64	%zmm0,1024(%rsp)
	vmovdqa64	%zmm3,1088(%rsp)
	vmovdqa64	%zmm4,1152(%rsp)
	vmovdqa64	%zmm5,1216(%rsp)
	vmovdqa64	%zmm6,%zmm14

	addq	$768,%r11
	subq	$768,%r8
	cmpq	$768,%r8
	jae	.L_encrypt_big_nblocks_172

.L_no_more_big_nblocks_172:

	cmpq	$512,%r8
	jae	.L_encrypt_32_blocks_172

	cmpq	$256,%r8
	jae	.L_encrypt_16_blocks_172
.L_encrypt_0_blocks_ghash_32_172:
	movl	%r8d,%r10d
	andl	$~15,%r10d
	movl	$256,%ebx
	subl	%r10d,%ebx
	vmovdqa64	768(%rsp),%zmm13
	vpxorq	%zmm14,%zmm13,%zmm13
	vmovdqu64	0(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	832(%rsp),%zmm13
	vmovdqu64	64(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpxorq	%zmm10,%zmm4,%zmm26
	vpxorq	%zmm6,%zmm0,%zmm24
	vpxorq	%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	896(%rsp),%zmm13
	vmovdqu64	128(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	960(%rsp),%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	addl	$256,%ebx
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_181

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_181
	jb	.L_last_num_blocks_is_7_1_181


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_181
	jb	.L_last_num_blocks_is_11_9_181


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_181
	ja	.L_last_num_blocks_is_16_181
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_181
	jmp	.L_last_num_blocks_is_13_181

.L_last_num_blocks_is_11_9_181:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_181
	ja	.L_last_num_blocks_is_11_181
	jmp	.L_last_num_blocks_is_9_181

.L_last_num_blocks_is_7_1_181:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_181
	jb	.L_last_num_blocks_is_3_1_181

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_181
	je	.L_last_num_blocks_is_6_181
	jmp	.L_last_num_blocks_is_5_181

.L_last_num_blocks_is_3_1_181:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_181
	je	.L_last_num_blocks_is_2_181
.L_last_num_blocks_is_1_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_182
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_182

.L_16_blocks_overflow_182:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_182:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%xmm29,%xmm0,%xmm17
	vextracti32x4	$0,%zmm17,%xmm7


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_183





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_183
.L_small_initial_partial_block_183:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)


	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm0


	vpclmulqdq	$0x01,%xmm25,%xmm0,%xmm3
	vpslldq	$8,%xmm3,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm3


	vpclmulqdq	$0x00,%xmm3,%xmm0,%xmm4
	vpsrldq	$4,%xmm4,%xmm4
	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm4,%xmm14












	vpxorq	%xmm7,%xmm14,%xmm14

	jmp	.L_after_reduction_183
.L_small_initial_compute_done_183:
.L_after_reduction_183:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_2_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$254,%r15d
	jae	.L_16_blocks_overflow_184
	vpaddd	%ymm28,%ymm2,%ymm0
	jmp	.L_16_blocks_ok_184

.L_16_blocks_overflow_184:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%ymm29,%ymm0,%ymm0
.L_16_blocks_ok_184:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%ymm17{%k1}{z}
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vaesenclast	%ymm30,%ymm0,%ymm0
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%ymm29,%ymm0,%ymm17
	vextracti32x4	$1,%zmm17,%xmm7
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_185





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_185
.L_small_initial_partial_block_185:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_185:

	orq	%r8,%r8
	je	.L_after_reduction_185
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_185:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_3_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$253,%r15d
	jae	.L_16_blocks_overflow_186
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_186

.L_16_blocks_overflow_186:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_186:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$2,%zmm17,%xmm7
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_187





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_187
.L_small_initial_partial_block_187:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_187:

	orq	%r8,%r8
	je	.L_after_reduction_187
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_187:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_4_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$252,%r15d
	jae	.L_16_blocks_overflow_188
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_188

.L_16_blocks_overflow_188:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_188:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$3,%zmm17,%xmm7
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_189





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_189
.L_small_initial_partial_block_189:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_189:

	orq	%r8,%r8
	je	.L_after_reduction_189
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_189:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_5_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$251,%r15d
	jae	.L_16_blocks_overflow_190
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%xmm27,%xmm0,%xmm3
	jmp	.L_16_blocks_ok_190

.L_16_blocks_overflow_190:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
.L_16_blocks_ok_190:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%xmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%xmm30,%xmm3,%xmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%xmm19,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%xmm29,%xmm3,%xmm19
	vextracti32x4	$0,%zmm19,%xmm7
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_191





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_191
.L_small_initial_partial_block_191:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_191:

	orq	%r8,%r8
	je	.L_after_reduction_191
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_191:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_6_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$250,%r15d
	jae	.L_16_blocks_overflow_192
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%ymm27,%ymm0,%ymm3
	jmp	.L_16_blocks_ok_192

.L_16_blocks_overflow_192:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
.L_16_blocks_ok_192:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%ymm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%ymm30,%ymm3,%ymm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%ymm29,%ymm3,%ymm19
	vextracti32x4	$1,%zmm19,%xmm7
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_193





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_193
.L_small_initial_partial_block_193:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_193:

	orq	%r8,%r8
	je	.L_after_reduction_193
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_193:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_7_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$249,%r15d
	jae	.L_16_blocks_overflow_194
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_194

.L_16_blocks_overflow_194:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_194:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$2,%zmm19,%xmm7
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_195





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_195
.L_small_initial_partial_block_195:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_195:

	orq	%r8,%r8
	je	.L_after_reduction_195
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_195:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_8_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$248,%r15d
	jae	.L_16_blocks_overflow_196
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_196

.L_16_blocks_overflow_196:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_196:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$3,%zmm19,%xmm7
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_197





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_197
.L_small_initial_partial_block_197:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_197:

	orq	%r8,%r8
	je	.L_after_reduction_197
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_197:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_9_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$247,%r15d
	jae	.L_16_blocks_overflow_198
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%xmm27,%xmm3,%xmm4
	jmp	.L_16_blocks_ok_198

.L_16_blocks_overflow_198:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
.L_16_blocks_ok_198:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%xmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%xmm30,%xmm4,%xmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%xmm20,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%xmm29,%xmm4,%xmm20
	vextracti32x4	$0,%zmm20,%xmm7
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_199





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_199
.L_small_initial_partial_block_199:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_199:

	orq	%r8,%r8
	je	.L_after_reduction_199
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_199:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_10_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$246,%r15d
	jae	.L_16_blocks_overflow_200
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%ymm27,%ymm3,%ymm4
	jmp	.L_16_blocks_ok_200

.L_16_blocks_overflow_200:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
.L_16_blocks_ok_200:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%ymm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%ymm30,%ymm4,%ymm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%ymm20,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%ymm29,%ymm4,%ymm20
	vextracti32x4	$1,%zmm20,%xmm7
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_201





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_201
.L_small_initial_partial_block_201:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_201:

	orq	%r8,%r8
	je	.L_after_reduction_201
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_201:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_11_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$245,%r15d
	jae	.L_16_blocks_overflow_202
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_202

.L_16_blocks_overflow_202:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_202:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$2,%zmm20,%xmm7
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_203





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_203
.L_small_initial_partial_block_203:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_203:

	orq	%r8,%r8
	je	.L_after_reduction_203
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_203:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_12_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$244,%r15d
	jae	.L_16_blocks_overflow_204
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_204

.L_16_blocks_overflow_204:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_204:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$3,%zmm20,%xmm7
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_205





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_205
.L_small_initial_partial_block_205:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_205:

	orq	%r8,%r8
	je	.L_after_reduction_205
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_205:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_13_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$243,%r15d
	jae	.L_16_blocks_overflow_206
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%xmm27,%xmm4,%xmm5
	jmp	.L_16_blocks_ok_206

.L_16_blocks_overflow_206:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
.L_16_blocks_ok_206:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%xmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%xmm30,%xmm5,%xmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%xmm21,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%xmm29,%xmm5,%xmm21
	vextracti32x4	$0,%zmm21,%xmm7
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_207





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_207
.L_small_initial_partial_block_207:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_207:

	orq	%r8,%r8
	je	.L_after_reduction_207
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_207:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_14_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$242,%r15d
	jae	.L_16_blocks_overflow_208
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%ymm27,%ymm4,%ymm5
	jmp	.L_16_blocks_ok_208

.L_16_blocks_overflow_208:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
.L_16_blocks_ok_208:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%ymm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%ymm30,%ymm5,%ymm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%ymm21,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%ymm29,%ymm5,%ymm21
	vextracti32x4	$1,%zmm21,%xmm7
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_209





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_209
.L_small_initial_partial_block_209:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_209:

	orq	%r8,%r8
	je	.L_after_reduction_209
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_209:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_15_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$241,%r15d
	jae	.L_16_blocks_overflow_210
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_210

.L_16_blocks_overflow_210:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_210:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$2,%zmm21,%xmm7
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_211





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_211
.L_small_initial_partial_block_211:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_211:

	orq	%r8,%r8
	je	.L_after_reduction_211
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_211:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_16_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$240,%r15d
	jae	.L_16_blocks_overflow_212
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_212

.L_16_blocks_overflow_212:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_212:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$3,%zmm21,%xmm7
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_213:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_213:
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_213:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_0_181:
	vmovdqa64	1024(%rsp),%zmm13
	vmovdqu64	0(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1088(%rsp),%zmm13
	vmovdqu64	64(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1152(%rsp),%zmm13
	vmovdqu64	128(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1216(%rsp),%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

.L_last_blocks_done_181:
	vpshufb	%xmm29,%xmm2,%xmm2
	jmp	.L_ghash_done_172
.L_encrypt_32_blocks_172:
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_214
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_214
.L_16_blocks_overflow_214:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_214:
	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp),%zmm18
	vmovdqa64	832(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp),%zmm1
	vmovdqa64	896(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp),%zmm18
	vmovdqa64	960(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm15,%zmm10,%zmm26
	vpxorq	%zmm12,%zmm6,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1)
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqa64	%zmm0,1280(%rsp)
	vmovdqa64	%zmm3,1344(%rsp)
	vmovdqa64	%zmm4,1408(%rsp)
	vmovdqa64	%zmm5,1472(%rsp)
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_215
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_215
.L_16_blocks_overflow_215:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_215:
	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	256(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	320(%rsp),%zmm18
	vmovdqa64	1088(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	384(%rsp),%zmm1
	vmovdqa64	1152(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	448(%rsp),%zmm18
	vmovdqa64	1216(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	256(%rcx,%r11,1),%zmm17
	vmovdqu8	320(%rcx,%r11,1),%zmm19
	vmovdqu8	384(%rcx,%r11,1),%zmm20
	vmovdqu8	448(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vpternlogq	$0x96,%zmm12,%zmm6,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,256(%r10,%r11,1)
	vmovdqu8	%zmm3,320(%r10,%r11,1)
	vmovdqu8	%zmm4,384(%r10,%r11,1)
	vmovdqu8	%zmm5,448(%r10,%r11,1)
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqa64	%zmm0,768(%rsp)
	vmovdqa64	%zmm3,832(%rsp)
	vmovdqa64	%zmm4,896(%rsp)
	vmovdqa64	%zmm5,960(%rsp)
	vmovdqa64	1280(%rsp),%zmm13
	vmovdqu64	512(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1344(%rsp),%zmm13
	vmovdqu64	576(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1408(%rsp),%zmm13
	vmovdqu64	640(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1472(%rsp),%zmm13
	vmovdqu64	704(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

	subq	$512,%r8
	addq	$512,%r11
	movl	%r8d,%r10d
	andl	$~15,%r10d
	movl	$512,%ebx
	subl	%r10d,%ebx
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_216

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_216
	jb	.L_last_num_blocks_is_7_1_216


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_216
	jb	.L_last_num_blocks_is_11_9_216


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_216
	ja	.L_last_num_blocks_is_16_216
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_216
	jmp	.L_last_num_blocks_is_13_216

.L_last_num_blocks_is_11_9_216:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_216
	ja	.L_last_num_blocks_is_11_216
	jmp	.L_last_num_blocks_is_9_216

.L_last_num_blocks_is_7_1_216:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_216
	jb	.L_last_num_blocks_is_3_1_216

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_216
	je	.L_last_num_blocks_is_6_216
	jmp	.L_last_num_blocks_is_5_216

.L_last_num_blocks_is_3_1_216:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_216
	je	.L_last_num_blocks_is_2_216
.L_last_num_blocks_is_1_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_217
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_217

.L_16_blocks_overflow_217:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_217:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%xmm29,%xmm0,%xmm17
	vextracti32x4	$0,%zmm17,%xmm7


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_218





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_218
.L_small_initial_partial_block_218:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)


	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm0


	vpclmulqdq	$0x01,%xmm25,%xmm0,%xmm3
	vpslldq	$8,%xmm3,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm3


	vpclmulqdq	$0x00,%xmm3,%xmm0,%xmm4
	vpsrldq	$4,%xmm4,%xmm4
	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm4,%xmm14












	vpxorq	%xmm7,%xmm14,%xmm14

	jmp	.L_after_reduction_218
.L_small_initial_compute_done_218:
.L_after_reduction_218:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_2_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$254,%r15d
	jae	.L_16_blocks_overflow_219
	vpaddd	%ymm28,%ymm2,%ymm0
	jmp	.L_16_blocks_ok_219

.L_16_blocks_overflow_219:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%ymm29,%ymm0,%ymm0
.L_16_blocks_ok_219:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%ymm17{%k1}{z}
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vaesenclast	%ymm30,%ymm0,%ymm0
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%ymm29,%ymm0,%ymm17
	vextracti32x4	$1,%zmm17,%xmm7
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_220





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_220
.L_small_initial_partial_block_220:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_220:

	orq	%r8,%r8
	je	.L_after_reduction_220
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_220:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_3_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$253,%r15d
	jae	.L_16_blocks_overflow_221
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_221

.L_16_blocks_overflow_221:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_221:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$2,%zmm17,%xmm7
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_222





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_222
.L_small_initial_partial_block_222:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_222:

	orq	%r8,%r8
	je	.L_after_reduction_222
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_222:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_4_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$252,%r15d
	jae	.L_16_blocks_overflow_223
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_223

.L_16_blocks_overflow_223:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_223:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$3,%zmm17,%xmm7
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_224





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_224
.L_small_initial_partial_block_224:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_224:

	orq	%r8,%r8
	je	.L_after_reduction_224
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_224:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_5_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$251,%r15d
	jae	.L_16_blocks_overflow_225
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%xmm27,%xmm0,%xmm3
	jmp	.L_16_blocks_ok_225

.L_16_blocks_overflow_225:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
.L_16_blocks_ok_225:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%xmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%xmm30,%xmm3,%xmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%xmm19,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%xmm29,%xmm3,%xmm19
	vextracti32x4	$0,%zmm19,%xmm7
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_226





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_226
.L_small_initial_partial_block_226:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_226:

	orq	%r8,%r8
	je	.L_after_reduction_226
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_226:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_6_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$250,%r15d
	jae	.L_16_blocks_overflow_227
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%ymm27,%ymm0,%ymm3
	jmp	.L_16_blocks_ok_227

.L_16_blocks_overflow_227:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
.L_16_blocks_ok_227:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%ymm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%ymm30,%ymm3,%ymm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%ymm29,%ymm3,%ymm19
	vextracti32x4	$1,%zmm19,%xmm7
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_228





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_228
.L_small_initial_partial_block_228:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_228:

	orq	%r8,%r8
	je	.L_after_reduction_228
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_228:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_7_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$249,%r15d
	jae	.L_16_blocks_overflow_229
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_229

.L_16_blocks_overflow_229:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_229:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$2,%zmm19,%xmm7
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_230





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_230
.L_small_initial_partial_block_230:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_230:

	orq	%r8,%r8
	je	.L_after_reduction_230
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_230:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_8_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$248,%r15d
	jae	.L_16_blocks_overflow_231
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_231

.L_16_blocks_overflow_231:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_231:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$3,%zmm19,%xmm7
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_232





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_232
.L_small_initial_partial_block_232:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_232:

	orq	%r8,%r8
	je	.L_after_reduction_232
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_232:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_9_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$247,%r15d
	jae	.L_16_blocks_overflow_233
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%xmm27,%xmm3,%xmm4
	jmp	.L_16_blocks_ok_233

.L_16_blocks_overflow_233:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
.L_16_blocks_ok_233:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%xmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%xmm30,%xmm4,%xmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%xmm20,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%xmm29,%xmm4,%xmm20
	vextracti32x4	$0,%zmm20,%xmm7
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_234





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_234
.L_small_initial_partial_block_234:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_234:

	orq	%r8,%r8
	je	.L_after_reduction_234
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_234:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_10_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$246,%r15d
	jae	.L_16_blocks_overflow_235
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%ymm27,%ymm3,%ymm4
	jmp	.L_16_blocks_ok_235

.L_16_blocks_overflow_235:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
.L_16_blocks_ok_235:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%ymm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%ymm30,%ymm4,%ymm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%ymm20,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%ymm29,%ymm4,%ymm20
	vextracti32x4	$1,%zmm20,%xmm7
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_236





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_236
.L_small_initial_partial_block_236:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_236:

	orq	%r8,%r8
	je	.L_after_reduction_236
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_236:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_11_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$245,%r15d
	jae	.L_16_blocks_overflow_237
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_237

.L_16_blocks_overflow_237:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_237:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$2,%zmm20,%xmm7
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_238





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_238
.L_small_initial_partial_block_238:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_238:

	orq	%r8,%r8
	je	.L_after_reduction_238
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_238:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_12_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$244,%r15d
	jae	.L_16_blocks_overflow_239
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_239

.L_16_blocks_overflow_239:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_239:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$3,%zmm20,%xmm7
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_240





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_240
.L_small_initial_partial_block_240:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_240:

	orq	%r8,%r8
	je	.L_after_reduction_240
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_240:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_13_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$243,%r15d
	jae	.L_16_blocks_overflow_241
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%xmm27,%xmm4,%xmm5
	jmp	.L_16_blocks_ok_241

.L_16_blocks_overflow_241:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
.L_16_blocks_ok_241:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%xmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%xmm30,%xmm5,%xmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%xmm21,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%xmm29,%xmm5,%xmm21
	vextracti32x4	$0,%zmm21,%xmm7
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_242





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_242
.L_small_initial_partial_block_242:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_242:

	orq	%r8,%r8
	je	.L_after_reduction_242
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_242:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_14_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$242,%r15d
	jae	.L_16_blocks_overflow_243
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%ymm27,%ymm4,%ymm5
	jmp	.L_16_blocks_ok_243

.L_16_blocks_overflow_243:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
.L_16_blocks_ok_243:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%ymm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%ymm30,%ymm5,%ymm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%ymm21,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%ymm29,%ymm5,%ymm21
	vextracti32x4	$1,%zmm21,%xmm7
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_244





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_244
.L_small_initial_partial_block_244:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_244:

	orq	%r8,%r8
	je	.L_after_reduction_244
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_244:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_15_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$241,%r15d
	jae	.L_16_blocks_overflow_245
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_245

.L_16_blocks_overflow_245:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_245:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$2,%zmm21,%xmm7
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_246





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_246
.L_small_initial_partial_block_246:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_246:

	orq	%r8,%r8
	je	.L_after_reduction_246
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_246:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_16_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$240,%r15d
	jae	.L_16_blocks_overflow_247
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_247

.L_16_blocks_overflow_247:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_247:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$3,%zmm21,%xmm7
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_248:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_248:
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_248:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_0_216:
	vmovdqa64	768(%rsp),%zmm13
	vpxorq	%zmm14,%zmm13,%zmm13
	vmovdqu64	0(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	832(%rsp),%zmm13
	vmovdqu64	64(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpxorq	%zmm10,%zmm4,%zmm26
	vpxorq	%zmm6,%zmm0,%zmm24
	vpxorq	%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	896(%rsp),%zmm13
	vmovdqu64	128(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	960(%rsp),%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

.L_last_blocks_done_216:
	vpshufb	%xmm29,%xmm2,%xmm2
	jmp	.L_ghash_done_172
.L_encrypt_16_blocks_172:
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_249
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_249
.L_16_blocks_overflow_249:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_249:
	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp),%zmm18
	vmovdqa64	832(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp),%zmm1
	vmovdqa64	896(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp),%zmm18
	vmovdqa64	960(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm15,%zmm10,%zmm26
	vpxorq	%zmm12,%zmm6,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1)
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqa64	%zmm0,1280(%rsp)
	vmovdqa64	%zmm3,1344(%rsp)
	vmovdqa64	%zmm4,1408(%rsp)
	vmovdqa64	%zmm5,1472(%rsp)
	vmovdqa64	1024(%rsp),%zmm13
	vmovdqu64	256(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1088(%rsp),%zmm13
	vmovdqu64	320(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1152(%rsp),%zmm13
	vmovdqu64	384(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1216(%rsp),%zmm13
	vmovdqu64	448(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	subq	$256,%r8
	addq	$256,%r11
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_250

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_250
	jb	.L_last_num_blocks_is_7_1_250


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_250
	jb	.L_last_num_blocks_is_11_9_250


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_250
	ja	.L_last_num_blocks_is_16_250
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_250
	jmp	.L_last_num_blocks_is_13_250

.L_last_num_blocks_is_11_9_250:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_250
	ja	.L_last_num_blocks_is_11_250
	jmp	.L_last_num_blocks_is_9_250

.L_last_num_blocks_is_7_1_250:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_250
	jb	.L_last_num_blocks_is_3_1_250

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_250
	je	.L_last_num_blocks_is_6_250
	jmp	.L_last_num_blocks_is_5_250

.L_last_num_blocks_is_3_1_250:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_250
	je	.L_last_num_blocks_is_2_250
.L_last_num_blocks_is_1_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_251
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_251

.L_16_blocks_overflow_251:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_251:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%xmm31,%xmm0,%xmm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%xmm29,%xmm0,%xmm17
	vextracti32x4	$0,%zmm17,%xmm7


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_252





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_252
.L_small_initial_partial_block_252:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)











	vpxorq	%xmm7,%xmm14,%xmm14

	jmp	.L_after_reduction_252
.L_small_initial_compute_done_252:
.L_after_reduction_252:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_2_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$254,%r15d
	jae	.L_16_blocks_overflow_253
	vpaddd	%ymm28,%ymm2,%ymm0
	jmp	.L_16_blocks_ok_253

.L_16_blocks_overflow_253:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%ymm29,%ymm0,%ymm0
.L_16_blocks_ok_253:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%ymm17{%k1}{z}
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%ymm31,%ymm0,%ymm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%ymm30,%ymm0,%ymm0
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%ymm29,%ymm0,%ymm17
	vextracti32x4	$1,%zmm17,%xmm7
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_254





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_254
.L_small_initial_partial_block_254:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_254:

	orq	%r8,%r8
	je	.L_after_reduction_254
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_254:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_3_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$253,%r15d
	jae	.L_16_blocks_overflow_255
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_255

.L_16_blocks_overflow_255:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_255:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$2,%zmm17,%xmm7
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_256





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_256
.L_small_initial_partial_block_256:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_256:

	orq	%r8,%r8
	je	.L_after_reduction_256
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_256:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_4_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$252,%r15d
	jae	.L_16_blocks_overflow_257
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_257

.L_16_blocks_overflow_257:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_257:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$3,%zmm17,%xmm7
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_258





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_258
.L_small_initial_partial_block_258:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_258:

	orq	%r8,%r8
	je	.L_after_reduction_258
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_258:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_5_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$251,%r15d
	jae	.L_16_blocks_overflow_259
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%xmm27,%xmm0,%xmm3
	jmp	.L_16_blocks_ok_259

.L_16_blocks_overflow_259:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
.L_16_blocks_ok_259:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%xmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%xmm30,%xmm3,%xmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%xmm19,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%xmm29,%xmm3,%xmm19
	vextracti32x4	$0,%zmm19,%xmm7
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_260





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_260
.L_small_initial_partial_block_260:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_260:

	orq	%r8,%r8
	je	.L_after_reduction_260
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_260:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_6_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$250,%r15d
	jae	.L_16_blocks_overflow_261
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%ymm27,%ymm0,%ymm3
	jmp	.L_16_blocks_ok_261

.L_16_blocks_overflow_261:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
.L_16_blocks_ok_261:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%ymm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%ymm30,%ymm3,%ymm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%ymm29,%ymm3,%ymm19
	vextracti32x4	$1,%zmm19,%xmm7
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_262





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_262
.L_small_initial_partial_block_262:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_262:

	orq	%r8,%r8
	je	.L_after_reduction_262
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_262:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_7_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$249,%r15d
	jae	.L_16_blocks_overflow_263
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_263

.L_16_blocks_overflow_263:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_263:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$2,%zmm19,%xmm7
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_264





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_264
.L_small_initial_partial_block_264:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_264:

	orq	%r8,%r8
	je	.L_after_reduction_264
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_264:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_8_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$248,%r15d
	jae	.L_16_blocks_overflow_265
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_265

.L_16_blocks_overflow_265:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_265:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$3,%zmm19,%xmm7
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_266





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_266
.L_small_initial_partial_block_266:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_266:

	orq	%r8,%r8
	je	.L_after_reduction_266
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_266:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_9_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$247,%r15d
	jae	.L_16_blocks_overflow_267
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%xmm27,%xmm3,%xmm4
	jmp	.L_16_blocks_ok_267

.L_16_blocks_overflow_267:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
.L_16_blocks_ok_267:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%xmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%xmm30,%xmm4,%xmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%xmm20,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%xmm29,%xmm4,%xmm20
	vextracti32x4	$0,%zmm20,%xmm7
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_268





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_268
.L_small_initial_partial_block_268:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_268:

	orq	%r8,%r8
	je	.L_after_reduction_268
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_268:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_10_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$246,%r15d
	jae	.L_16_blocks_overflow_269
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%ymm27,%ymm3,%ymm4
	jmp	.L_16_blocks_ok_269

.L_16_blocks_overflow_269:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
.L_16_blocks_ok_269:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%ymm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%ymm30,%ymm4,%ymm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%ymm20,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%ymm29,%ymm4,%ymm20
	vextracti32x4	$1,%zmm20,%xmm7
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_270





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_270
.L_small_initial_partial_block_270:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_270:

	orq	%r8,%r8
	je	.L_after_reduction_270
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_270:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_11_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$245,%r15d
	jae	.L_16_blocks_overflow_271
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_271

.L_16_blocks_overflow_271:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_271:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$2,%zmm20,%xmm7
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_272





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_272
.L_small_initial_partial_block_272:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_272:

	orq	%r8,%r8
	je	.L_after_reduction_272
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_272:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_12_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$244,%r15d
	jae	.L_16_blocks_overflow_273
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_273

.L_16_blocks_overflow_273:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_273:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$3,%zmm20,%xmm7
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_274





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_274
.L_small_initial_partial_block_274:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_274:

	orq	%r8,%r8
	je	.L_after_reduction_274
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_274:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_13_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$243,%r15d
	jae	.L_16_blocks_overflow_275
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%xmm27,%xmm4,%xmm5
	jmp	.L_16_blocks_ok_275

.L_16_blocks_overflow_275:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
.L_16_blocks_ok_275:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%xmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%xmm30,%xmm5,%xmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%xmm21,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%xmm29,%xmm5,%xmm21
	vextracti32x4	$0,%zmm21,%xmm7
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_276





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_276
.L_small_initial_partial_block_276:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_276:

	orq	%r8,%r8
	je	.L_after_reduction_276
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_276:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_14_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$242,%r15d
	jae	.L_16_blocks_overflow_277
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%ymm27,%ymm4,%ymm5
	jmp	.L_16_blocks_ok_277

.L_16_blocks_overflow_277:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
.L_16_blocks_ok_277:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%ymm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%ymm30,%ymm5,%ymm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%ymm21,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%ymm29,%ymm5,%ymm21
	vextracti32x4	$1,%zmm21,%xmm7
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_278





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_278
.L_small_initial_partial_block_278:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_278:

	orq	%r8,%r8
	je	.L_after_reduction_278
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_278:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_15_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$241,%r15d
	jae	.L_16_blocks_overflow_279
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_279

.L_16_blocks_overflow_279:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_279:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$2,%zmm21,%xmm7
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_280





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_280
.L_small_initial_partial_block_280:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_280:

	orq	%r8,%r8
	je	.L_after_reduction_280
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_280:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_16_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$240,%r15d
	jae	.L_16_blocks_overflow_281
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_281

.L_16_blocks_overflow_281:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_281:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$3,%zmm21,%xmm7
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_282:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_282:
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_282:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_0_250:
	vmovdqa64	1280(%rsp),%zmm13
	vmovdqu64	512(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1344(%rsp),%zmm13
	vmovdqu64	576(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1408(%rsp),%zmm13
	vmovdqu64	640(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1472(%rsp),%zmm13
	vmovdqu64	704(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

.L_last_blocks_done_250:
	vpshufb	%xmm29,%xmm2,%xmm2
	jmp	.L_ghash_done_172

.L_message_below_32_blocks_172:


	subq	$256,%r8
	addq	$256,%r11
	movl	%r8d,%r10d
	testq	%r14,%r14
	jnz	.L_skip_hkeys_precomputation_283
	vmovdqu64	640(%rsp),%zmm3


	vshufi64x2	$0x00,%zmm3,%zmm3,%zmm3

	vmovdqu64	576(%rsp),%zmm4
	vmovdqu64	512(%rsp),%zmm5

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,448(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,384(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,320(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,256(%rsp)
.L_skip_hkeys_precomputation_283:
	movq	$1,%r14
	andl	$~15,%r10d
	movl	$512,%ebx
	subl	%r10d,%ebx
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_284

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_284
	jb	.L_last_num_blocks_is_7_1_284


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_284
	jb	.L_last_num_blocks_is_11_9_284


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_284
	ja	.L_last_num_blocks_is_16_284
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_284
	jmp	.L_last_num_blocks_is_13_284

.L_last_num_blocks_is_11_9_284:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_284
	ja	.L_last_num_blocks_is_11_284
	jmp	.L_last_num_blocks_is_9_284

.L_last_num_blocks_is_7_1_284:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_284
	jb	.L_last_num_blocks_is_3_1_284

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_284
	je	.L_last_num_blocks_is_6_284
	jmp	.L_last_num_blocks_is_5_284

.L_last_num_blocks_is_3_1_284:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_284
	je	.L_last_num_blocks_is_2_284
.L_last_num_blocks_is_1_284:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_285
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_285

.L_16_blocks_overflow_285:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_285:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%xmm29,%xmm0,%xmm17
	vextracti32x4	$0,%zmm17,%xmm7


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_286





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_286
.L_small_initial_partial_block_286:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)


	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm0


	vpclmulqdq	$0x01,%xmm25,%xmm0,%xmm3
	vpslldq	$8,%xmm3,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm3


	vpclmulqdq	$0x00,%xmm3,%xmm0,%xmm4
	vpsrldq	$4,%xmm4,%xmm4
	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm4,%xmm14












	vpxorq	%xmm7,%xmm14,%xmm14

	jmp	.L_after_reduction_286
.L_small_initial_compute_done_286:
.L_after_reduction_286:
	jmp	.L_last_blocks_done_284
.L_last_num_blocks_is_2_284:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$254,%r15d
	jae	.L_16_blocks_overflow_287
	vpaddd	%ymm28,%ymm2,%ymm0
	jmp	.L_16_blocks_ok_287

.L_16_blocks_overflow_287:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%ymm29,%ymm0,%ymm0
.L_16_blocks_ok_287:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%ymm17{%k1}{z}
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vaesenclast	%ymm30,%ymm0,%ymm0
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%ymm29,%ymm0,%ymm17
	vextracti32x4	$1,%zmm17,%xmm7
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_288





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_288
.L_small_initial_partial_block_288:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_288:

	orq	%r8,%r8
	je	.L_after_reduction_288
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_288:
	jmp	.L_last_blocks_done_284
.L_last_num_blocks_is_3_284:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$253,%r15d
	jae	.L_16_blocks_overflow_289
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_289

.L_16_blocks_overflow_289:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_289:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$2,%zmm17,%xmm7
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_290





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_290
.L_small_initial_partial_block_290:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_290:

	orq	%r8,%r8
	je	.L_after_reduction_290
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_290:
	jmp	.L_last_blocks_done_284
.L_last_num_blocks_is_4_284:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$252,%r15d
	jae	.L_16_blocks_overflow_291
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_291

.L_16_blocks_overflow_291:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_291:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$3,%zmm17,%xmm7
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_292





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_292
.L_small_initial_partial_block_292:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_292:

	orq	%r8,%r8
	je	.L_after_reduction_292
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_292:
	jmp	.L_last_blocks_done_284
.L_last_num_blocks_is_5_284:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$251,%r15d
	jae	.L_16_blocks_overflow_293
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%xmm27,%xmm0,%xmm3
	jmp	.L_16_blocks_ok_293

.L_16_blocks_overflow_293:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
.L_16_blocks_ok_293:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%xmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%xmm30,%xmm3,%xmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%xmm19,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%xmm29,%xmm3,%xmm19
	vextracti32x4	$0,%zmm19,%xmm7
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_294





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_294
.L_small_initial_partial_block_294:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_294:

	orq	%r8,%r8
	je	.L_after_reduction_294
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_294:
	jmp	.L_last_blocks_done_284
.L_last_num_blocks_is_6_284:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$250,%r15d
	jae	.L_16_blocks_overflow_295
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%ymm27,%ymm0,%ymm3
	jmp	.L_16_blocks_ok_295

.L_16_blocks_overflow_295:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
.L_16_blocks_ok_295:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%ymm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%ymm30,%ymm3,%ymm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%ymm29,%ymm3,%ymm19
	vextracti32x4	$1,%zmm19,%xmm7
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_296





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_296
.L_small_initial_partial_block_296:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_296:

	orq	%r8,%r8
	je	.L_after_reduction_296
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_296:
	jmp	.L_last_blocks_done_284
.L_last_num_blocks_is_7_284:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$249,%r15d
	jae	.L_16_blocks_overflow_297
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_297

.L_16_blocks_overflow_297:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_297:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$2,%zmm19,%xmm7
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_298





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_298
.L_small_initial_partial_block_298:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_298:

	orq	%r8,%r8
	je	.L_after_reduction_298
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_298:
	jmp	.L_last_blocks_done_284
.L_last_num_blocks_is_8_284:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$248,%r15d
	jae	.L_16_blocks_overflow_299
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_299

.L_16_blocks_overflow_299:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_299:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$3,%zmm19,%xmm7
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_300





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_300
.L_small_initial_partial_block_300:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_300:

	orq	%r8,%r8
	je	.L_after_reduction_300
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_300:
	jmp	.L_last_blocks_done_284
.L_last_num_blocks_is_9_284:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$247,%r15d
	jae	.L_16_blocks_overflow_301
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%xmm27,%xmm3,%xmm4
	jmp	.L_16_blocks_ok_301

.L_16_blocks_overflow_301:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
.L_16_blocks_ok_301:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%xmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%xmm30,%xmm4,%xmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%xmm20,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%xmm29,%xmm4,%xmm20
	vextracti32x4	$0,%zmm20,%xmm7
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_302





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_302
.L_small_initial_partial_block_302:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_302:

	orq	%r8,%r8
	je	.L_after_reduction_302
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_302:
	jmp	.L_last_blocks_done_284
.L_last_num_blocks_is_10_284:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$246,%r15d
	jae	.L_16_blocks_overflow_303
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%ymm27,%ymm3,%ymm4
	jmp	.L_16_blocks_ok_303

.L_16_blocks_overflow_303:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
.L_16_blocks_ok_303:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%ymm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%ymm30,%ymm4,%ymm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%ymm20,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%ymm29,%ymm4,%ymm20
	vextracti32x4	$1,%zmm20,%xmm7
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_304





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_304
.L_small_initial_partial_block_304:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_304:

	orq	%r8,%r8
	je	.L_after_reduction_304
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_304:
	jmp	.L_last_blocks_done_284
.L_last_num_blocks_is_11_284:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$245,%r15d
	jae	.L_16_blocks_overflow_305
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_305

.L_16_blocks_overflow_305:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_305:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$2,%zmm20,%xmm7
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_306





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_306
.L_small_initial_partial_block_306:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_306:

	orq	%r8,%r8
	je	.L_after_reduction_306
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_306:
	jmp	.L_last_blocks_done_284
.L_last_num_blocks_is_12_284:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$244,%r15d
	jae	.L_16_blocks_overflow_307
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_307

.L_16_blocks_overflow_307:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_307:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$3,%zmm20,%xmm7
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_308





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_308
.L_small_initial_partial_block_308:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_308:

	orq	%r8,%r8
	je	.L_after_reduction_308
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_308:
	jmp	.L_last_blocks_done_284
.L_last_num_blocks_is_13_284:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$243,%r15d
	jae	.L_16_blocks_overflow_309
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%xmm27,%xmm4,%xmm5
	jmp	.L_16_blocks_ok_309

.L_16_blocks_overflow_309:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
.L_16_blocks_ok_309:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%xmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%xmm30,%xmm5,%xmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%xmm21,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%xmm29,%xmm5,%xmm21
	vextracti32x4	$0,%zmm21,%xmm7
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_310





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_310
.L_small_initial_partial_block_310:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_310:

	orq	%r8,%r8
	je	.L_after_reduction_310
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_310:
	jmp	.L_last_blocks_done_284
.L_last_num_blocks_is_14_284:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$242,%r15d
	jae	.L_16_blocks_overflow_311
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%ymm27,%ymm4,%ymm5
	jmp	.L_16_blocks_ok_311

.L_16_blocks_overflow_311:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
.L_16_blocks_ok_311:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%ymm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%ymm30,%ymm5,%ymm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%ymm21,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%ymm29,%ymm5,%ymm21
	vextracti32x4	$1,%zmm21,%xmm7
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_312





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_312
.L_small_initial_partial_block_312:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_312:

	orq	%r8,%r8
	je	.L_after_reduction_312
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_312:
	jmp	.L_last_blocks_done_284
.L_last_num_blocks_is_15_284:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$241,%r15d
	jae	.L_16_blocks_overflow_313
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_313

.L_16_blocks_overflow_313:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_313:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$2,%zmm21,%xmm7
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_314





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_314
.L_small_initial_partial_block_314:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_314:

	orq	%r8,%r8
	je	.L_after_reduction_314
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_314:
	jmp	.L_last_blocks_done_284
.L_last_num_blocks_is_16_284:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$240,%r15d
	jae	.L_16_blocks_overflow_315
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_315

.L_16_blocks_overflow_315:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_315:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$3,%zmm21,%xmm7
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_316:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_316:
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_316:
	jmp	.L_last_blocks_done_284
.L_last_num_blocks_is_0_284:
	vmovdqa64	768(%rsp),%zmm13
	vpxorq	%zmm14,%zmm13,%zmm13
	vmovdqu64	0(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	832(%rsp),%zmm13
	vmovdqu64	64(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpxorq	%zmm10,%zmm4,%zmm26
	vpxorq	%zmm6,%zmm0,%zmm24
	vpxorq	%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	896(%rsp),%zmm13
	vmovdqu64	128(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	960(%rsp),%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

.L_last_blocks_done_284:
	vpshufb	%xmm29,%xmm2,%xmm2
	jmp	.L_ghash_done_172

.L_message_below_equal_16_blocks_172:


	movl	%r8d,%r12d
	addl	$15,%r12d
	shrl	$4,%r12d
	cmpq	$8,%r12
	je	.L_small_initial_num_blocks_is_8_317
	jl	.L_small_initial_num_blocks_is_7_1_317


	cmpq	$12,%r12
	je	.L_small_initial_num_blocks_is_12_317
	jl	.L_small_initial_num_blocks_is_11_9_317


	cmpq	$16,%r12
	je	.L_small_initial_num_blocks_is_16_317
	cmpq	$15,%r12
	je	.L_small_initial_num_blocks_is_15_317
	cmpq	$14,%r12
	je	.L_small_initial_num_blocks_is_14_317
	jmp	.L_small_initial_num_blocks_is_13_317

.L_small_initial_num_blocks_is_11_9_317:

	cmpq	$11,%r12
	je	.L_small_initial_num_blocks_is_11_317
	cmpq	$10,%r12
	je	.L_small_initial_num_blocks_is_10_317
	jmp	.L_small_initial_num_blocks_is_9_317

.L_small_initial_num_blocks_is_7_1_317:
	cmpq	$4,%r12
	je	.L_small_initial_num_blocks_is_4_317
	jl	.L_small_initial_num_blocks_is_3_1_317

	cmpq	$7,%r12
	je	.L_small_initial_num_blocks_is_7_317
	cmpq	$6,%r12
	je	.L_small_initial_num_blocks_is_6_317
	jmp	.L_small_initial_num_blocks_is_5_317

.L_small_initial_num_blocks_is_3_1_317:

	cmpq	$3,%r12
	je	.L_small_initial_num_blocks_is_3_317
	cmpq	$2,%r12
	je	.L_small_initial_num_blocks_is_2_317





.L_small_initial_num_blocks_is_1_317:
	vmovdqa64	SHUF_MASK(%rip),%xmm29
	vpaddd	ONE(%rip),%xmm2,%xmm0
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$0,%zmm0,%xmm2
	vpshufb	%xmm29,%xmm0,%xmm0
	vmovdqu8	0(%rcx,%r11,1),%xmm6{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%xmm15,%xmm0,%xmm0
	vpxorq	%xmm6,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm12
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%xmm29,%xmm0,%xmm6
	vextracti32x4	$0,%zmm6,%xmm13


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_318





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm6,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm6,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm6,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm6,%xmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_318
.L_small_initial_partial_block_318:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)











	vpxorq	%xmm13,%xmm14,%xmm14

	jmp	.L_after_reduction_318
.L_small_initial_compute_done_318:
.L_after_reduction_318:
	jmp	.L_small_initial_blocks_encrypted_317
.L_small_initial_num_blocks_is_2_317:
	vmovdqa64	SHUF_MASK(%rip),%ymm29
	vshufi64x2	$0,%ymm2,%ymm2,%ymm0
	vpaddd	ddq_add_1234(%rip),%ymm0,%ymm0
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$1,%zmm0,%xmm2
	vpshufb	%ymm29,%ymm0,%ymm0
	vmovdqu8	0(%rcx,%r11,1),%ymm6{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%ymm15,%ymm0,%ymm0
	vpxorq	%ymm6,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm12
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%ymm29,%ymm0,%ymm6
	vextracti32x4	$1,%zmm6,%xmm13
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_319





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm6,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm6,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm6,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm6,%ymm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_319
.L_small_initial_partial_block_319:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm6,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm6,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm6,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm6,%xmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_319:

	orq	%r8,%r8
	je	.L_after_reduction_319
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_319:
	jmp	.L_small_initial_blocks_encrypted_317
.L_small_initial_num_blocks_is_3_317:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$2,%zmm0,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vmovdqu8	0(%rcx,%r11,1),%zmm6{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm6,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vextracti32x4	$2,%zmm6,%xmm13
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_320





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_320
.L_small_initial_partial_block_320:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm6,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm6,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm6,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm6,%ymm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_320:

	orq	%r8,%r8
	je	.L_after_reduction_320
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_320:
	jmp	.L_small_initial_blocks_encrypted_317
.L_small_initial_num_blocks_is_4_317:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$3,%zmm0,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vmovdqu8	0(%rcx,%r11,1),%zmm6{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm6,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vextracti32x4	$3,%zmm6,%xmm13
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_321





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_321
.L_small_initial_partial_block_321:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_321:

	orq	%r8,%r8
	je	.L_after_reduction_321
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_321:
	jmp	.L_small_initial_blocks_encrypted_317
.L_small_initial_num_blocks_is_5_317:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$64,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$0,%zmm3,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%xmm7{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%xmm15,%xmm3,%xmm3
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%xmm7,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%xmm29,%xmm3,%xmm7
	vextracti32x4	$0,%zmm7,%xmm13
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_322





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm7,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm7,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm7,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm7,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_322
.L_small_initial_partial_block_322:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_322:

	orq	%r8,%r8
	je	.L_after_reduction_322
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_322:
	jmp	.L_small_initial_blocks_encrypted_317
.L_small_initial_num_blocks_is_6_317:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$64,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$1,%zmm3,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%ymm7{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%ymm15,%ymm3,%ymm3
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%ymm7,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%ymm29,%ymm3,%ymm7
	vextracti32x4	$1,%zmm7,%xmm13
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_323





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm7,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm7,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm7,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm7,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_323
.L_small_initial_partial_block_323:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm7,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm7,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm7,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm7,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_323:

	orq	%r8,%r8
	je	.L_after_reduction_323
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_323:
	jmp	.L_small_initial_blocks_encrypted_317
.L_small_initial_num_blocks_is_7_317:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$64,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$2,%zmm3,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vextracti32x4	$2,%zmm7,%xmm13
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_324





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_324
.L_small_initial_partial_block_324:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm7,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm7,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm7,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm7,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_324:

	orq	%r8,%r8
	je	.L_after_reduction_324
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_324:
	jmp	.L_small_initial_blocks_encrypted_317
.L_small_initial_num_blocks_is_8_317:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$64,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$3,%zmm3,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vextracti32x4	$3,%zmm7,%xmm13
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_325





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	224(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_325
.L_small_initial_partial_block_325:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_325:

	orq	%r8,%r8
	je	.L_after_reduction_325
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_325:
	jmp	.L_small_initial_blocks_encrypted_317
.L_small_initial_num_blocks_is_9_317:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$128,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$0,%zmm4,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%xmm10{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%xmm15,%xmm4,%xmm4
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%xmm10,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%xmm29,%xmm4,%xmm10
	vextracti32x4	$0,%zmm10,%xmm13
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_326





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	208(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm10,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm10,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm10,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm10,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_326
.L_small_initial_partial_block_326:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	224(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_326:

	orq	%r8,%r8
	je	.L_after_reduction_326
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_326:
	jmp	.L_small_initial_blocks_encrypted_317
.L_small_initial_num_blocks_is_10_317:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$128,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$1,%zmm4,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%ymm10{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%ymm15,%ymm4,%ymm4
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%ymm10,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%ymm29,%ymm4,%ymm10
	vextracti32x4	$1,%zmm10,%xmm13
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_327





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	192(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm10,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm10,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm10,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm10,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_327
.L_small_initial_partial_block_327:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	208(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm10,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm10,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm10,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm10,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_327:

	orq	%r8,%r8
	je	.L_after_reduction_327
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_327:
	jmp	.L_small_initial_blocks_encrypted_317
.L_small_initial_num_blocks_is_11_317:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$128,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$2,%zmm4,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%zmm29,%zmm4,%zmm10
	vextracti32x4	$2,%zmm10,%xmm13
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_328





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	176(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_328
.L_small_initial_partial_block_328:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	192(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm10,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm10,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm10,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm10,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_328:

	orq	%r8,%r8
	je	.L_after_reduction_328
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_328:
	jmp	.L_small_initial_blocks_encrypted_317
.L_small_initial_num_blocks_is_12_317:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$128,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$3,%zmm4,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%zmm29,%zmm4,%zmm10
	vextracti32x4	$3,%zmm10,%xmm13
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_329





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	160(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	224(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_329
.L_small_initial_partial_block_329:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	176(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_329:

	orq	%r8,%r8
	je	.L_after_reduction_329
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_329:
	jmp	.L_small_initial_blocks_encrypted_317
.L_small_initial_num_blocks_is_13_317:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	vpaddd	ddq_add_8888(%rip),%zmm3,%zmm5
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$192,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$0,%zmm5,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10
	vmovdqu8	192(%rcx,%r11,1),%xmm11{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vpxorq	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vaesenclast	%xmm15,%xmm5,%xmm5
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vpxorq	%xmm11,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%zmm29,%zmm4,%zmm10
	vpshufb	%xmm29,%xmm5,%xmm11
	vextracti32x4	$0,%zmm11,%xmm13
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_330





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	144(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	208(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm11,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm11,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm11,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm11,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_330
.L_small_initial_partial_block_330:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	160(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	224(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_330:

	orq	%r8,%r8
	je	.L_after_reduction_330
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_330:
	jmp	.L_small_initial_blocks_encrypted_317
.L_small_initial_num_blocks_is_14_317:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	vpaddd	ddq_add_8888(%rip),%zmm3,%zmm5
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$192,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$1,%zmm5,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10
	vmovdqu8	192(%rcx,%r11,1),%ymm11{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vpxorq	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vaesenclast	%ymm15,%ymm5,%ymm5
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vpxorq	%ymm11,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%zmm29,%zmm4,%zmm10
	vpshufb	%ymm29,%ymm5,%ymm11
	vextracti32x4	$1,%zmm11,%xmm13
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_331





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	128(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	192(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm11,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm11,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm11,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm11,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_331
.L_small_initial_partial_block_331:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	144(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	208(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm11,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm11,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm11,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm11,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_331:

	orq	%r8,%r8
	je	.L_after_reduction_331
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_331:
	jmp	.L_small_initial_blocks_encrypted_317
.L_small_initial_num_blocks_is_15_317:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	vpaddd	ddq_add_8888(%rip),%zmm3,%zmm5
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$192,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$2,%zmm5,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10
	vmovdqu8	192(%rcx,%r11,1),%zmm11{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vpxorq	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vaesenclast	%zmm15,%zmm5,%zmm5
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vpxorq	%zmm11,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%zmm29,%zmm4,%zmm10
	vpshufb	%zmm29,%zmm5,%zmm11
	vextracti32x4	$2,%zmm11,%xmm13
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_332





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	112(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	176(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm11,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm11,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm11,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm11,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_332
.L_small_initial_partial_block_332:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	128(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	192(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm11,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm11,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm11,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm11,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_332:

	orq	%r8,%r8
	je	.L_after_reduction_332
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_332:
	jmp	.L_small_initial_blocks_encrypted_317
.L_small_initial_num_blocks_is_16_317:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	vpaddd	ddq_add_8888(%rip),%zmm3,%zmm5
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$192,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$3,%zmm5,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10
	vmovdqu8	192(%rcx,%r11,1),%zmm11{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vpxorq	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vaesenclast	%zmm15,%zmm5,%zmm5
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vpxorq	%zmm11,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%zmm29,%zmm4,%zmm10
	vpshufb	%zmm29,%zmm5,%zmm11
	vextracti32x4	$3,%zmm11,%xmm13
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_333:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	112(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	176(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm11,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm11,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm11,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm11,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_333:
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_333:
.L_small_initial_blocks_encrypted_317:
.L_ghash_done_172:
	vmovdqu64	%xmm2,0(%rsi)
	vmovdqu64	%xmm14,64(%rsi)
.L_enc_dec_done_172:
	jmp	.Lexit_gcm_encrypt
.align	32
.Laes_gcm_encrypt_256_avx512:
	orq	%r8,%r8
	je	.L_enc_dec_done_334
	xorq	%r14,%r14
	vmovdqu64	64(%rsi),%xmm14

	movq	(%rdx),%r11
	orq	%r11,%r11
	je	.L_partial_block_done_335
	movl	$16,%r10d
	leaq	byte_len_to_mask_table(%rip),%r12
	cmpq	%r10,%r8
	cmovcq	%r8,%r10
	kmovw	(%r12,%r10,2),%k1
	vmovdqu8	(%rcx),%xmm0{%k1}{z}

	vmovdqu64	16(%rsi),%xmm3
	vmovdqu64	336(%rsi),%xmm4



	leaq	SHIFT_MASK(%rip),%r12
	addq	%r11,%r12
	vmovdqu64	(%r12),%xmm5
	vpshufb	%xmm5,%xmm3,%xmm3
	vpxorq	%xmm0,%xmm3,%xmm3


	leaq	(%r8,%r11,1),%r13
	subq	$16,%r13
	jge	.L_no_extra_mask_335
	subq	%r13,%r12
.L_no_extra_mask_335:



	vmovdqu64	16(%r12),%xmm0
	vpand	%xmm0,%xmm3,%xmm3
	vpshufb	SHUF_MASK(%rip),%xmm3,%xmm3
	vpshufb	%xmm5,%xmm3,%xmm3
	vpxorq	%xmm3,%xmm14,%xmm14
	cmpq	$0,%r13
	jl	.L_partial_incomplete_335

	vpclmulqdq	$0x11,%xmm4,%xmm14,%xmm7
	vpclmulqdq	$0x00,%xmm4,%xmm14,%xmm10
	vpclmulqdq	$0x01,%xmm4,%xmm14,%xmm11
	vpclmulqdq	$0x10,%xmm4,%xmm14,%xmm14
	vpxorq	%xmm11,%xmm14,%xmm14

	vpsrldq	$8,%xmm14,%xmm11
	vpslldq	$8,%xmm14,%xmm14
	vpxorq	%xmm11,%xmm7,%xmm7
	vpxorq	%xmm10,%xmm14,%xmm14



	vmovdqu64	POLY2(%rip),%xmm11

	vpclmulqdq	$0x01,%xmm14,%xmm11,%xmm10
	vpslldq	$8,%xmm10,%xmm10
	vpxorq	%xmm10,%xmm14,%xmm14



	vpclmulqdq	$0x00,%xmm14,%xmm11,%xmm10
	vpsrldq	$4,%xmm10,%xmm10
	vpclmulqdq	$0x10,%xmm14,%xmm11,%xmm14
	vpslldq	$4,%xmm14,%xmm14

	vpternlogq	$0x96,%xmm10,%xmm7,%xmm14

	movq	$0,(%rdx)

	movq	%r11,%r12
	movq	$16,%r11
	subq	%r12,%r11
	jmp	.L_enc_dec_done_335

.L_partial_incomplete_335:
	addq	%r8,(%rdx)
	movq	%r8,%r11

.L_enc_dec_done_335:


	leaq	byte_len_to_mask_table(%rip),%r12
	kmovw	(%r12,%r11,2),%k1
	vmovdqu64	%xmm14,64(%rsi)

	vpshufb	SHUF_MASK(%rip),%xmm3,%xmm3
	vpshufb	%xmm5,%xmm3,%xmm3
	movq	%r9,%r12
	vmovdqu8	%xmm3,(%r12){%k1}
.L_partial_block_done_335:
	vmovdqu64	0(%rsi),%xmm2
	subq	%r11,%r8
	je	.L_enc_dec_done_334
	cmpq	$256,%r8
	jbe	.L_message_below_equal_16_blocks_334

	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vmovdqa64	ddq_addbe_4444(%rip),%zmm27
	vmovdqa64	ddq_addbe_1234(%rip),%zmm28






	vmovd	%xmm2,%r15d
	andl	$255,%r15d

	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpshufb	%zmm29,%zmm2,%zmm2



	cmpb	$240,%r15b
	jae	.L_next_16_overflow_336
	vpaddd	%zmm28,%zmm2,%zmm7
	vpaddd	%zmm27,%zmm7,%zmm10
	vpaddd	%zmm27,%zmm10,%zmm11
	vpaddd	%zmm27,%zmm11,%zmm12
	jmp	.L_next_16_ok_336
.L_next_16_overflow_336:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm12
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm7
	vpaddd	%zmm12,%zmm7,%zmm10
	vpaddd	%zmm12,%zmm10,%zmm11
	vpaddd	%zmm12,%zmm11,%zmm12
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vpshufb	%zmm29,%zmm12,%zmm12
.L_next_16_ok_336:
	vshufi64x2	$255,%zmm12,%zmm12,%zmm2
	addb	$16,%r15b

	vmovdqu8	0(%rcx,%r11,1),%zmm0
	vmovdqu8	64(%rcx,%r11,1),%zmm3
	vmovdqu8	128(%rcx,%r11,1),%zmm4
	vmovdqu8	192(%rcx,%r11,1),%zmm5


	vbroadcastf64x2	0(%rdi),%zmm6
	vpxorq	%zmm6,%zmm7,%zmm7
	vpxorq	%zmm6,%zmm10,%zmm10
	vpxorq	%zmm6,%zmm11,%zmm11
	vpxorq	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	16(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	32(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	48(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	64(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	80(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	96(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	112(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	128(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	144(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	160(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	176(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	192(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	208(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	224(%rdi),%zmm6
	vaesenclast	%zmm6,%zmm7,%zmm7
	vaesenclast	%zmm6,%zmm10,%zmm10
	vaesenclast	%zmm6,%zmm11,%zmm11
	vaesenclast	%zmm6,%zmm12,%zmm12


	vpxorq	%zmm0,%zmm7,%zmm7
	vpxorq	%zmm3,%zmm10,%zmm10
	vpxorq	%zmm4,%zmm11,%zmm11
	vpxorq	%zmm5,%zmm12,%zmm12


	movq	%r9,%r10
	vmovdqu8	%zmm7,0(%r10,%r11,1)
	vmovdqu8	%zmm10,64(%r10,%r11,1)
	vmovdqu8	%zmm11,128(%r10,%r11,1)
	vmovdqu8	%zmm12,192(%r10,%r11,1)

	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vpshufb	%zmm29,%zmm12,%zmm12
	vmovdqa64	%zmm7,768(%rsp)
	vmovdqa64	%zmm10,832(%rsp)
	vmovdqa64	%zmm11,896(%rsp)
	vmovdqa64	%zmm12,960(%rsp)
	testq	%r14,%r14
	jnz	.L_skip_hkeys_precomputation_337

	vmovdqu64	288(%rsi),%zmm0
	vmovdqu64	%zmm0,704(%rsp)

	vmovdqu64	224(%rsi),%zmm3
	vmovdqu64	%zmm3,640(%rsp)


	vshufi64x2	$0x00,%zmm3,%zmm3,%zmm3

	vmovdqu64	160(%rsi),%zmm4
	vmovdqu64	%zmm4,576(%rsp)

	vmovdqu64	96(%rsi),%zmm5
	vmovdqu64	%zmm5,512(%rsp)
.L_skip_hkeys_precomputation_337:
	cmpq	$512,%r8
	jb	.L_message_below_32_blocks_334



	cmpb	$240,%r15b
	jae	.L_next_16_overflow_338
	vpaddd	%zmm28,%zmm2,%zmm7
	vpaddd	%zmm27,%zmm7,%zmm10
	vpaddd	%zmm27,%zmm10,%zmm11
	vpaddd	%zmm27,%zmm11,%zmm12
	jmp	.L_next_16_ok_338
.L_next_16_overflow_338:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm12
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm7
	vpaddd	%zmm12,%zmm7,%zmm10
	vpaddd	%zmm12,%zmm10,%zmm11
	vpaddd	%zmm12,%zmm11,%zmm12
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vpshufb	%zmm29,%zmm12,%zmm12
.L_next_16_ok_338:
	vshufi64x2	$255,%zmm12,%zmm12,%zmm2
	addb	$16,%r15b

	vmovdqu8	256(%rcx,%r11,1),%zmm0
	vmovdqu8	320(%rcx,%r11,1),%zmm3
	vmovdqu8	384(%rcx,%r11,1),%zmm4
	vmovdqu8	448(%rcx,%r11,1),%zmm5


	vbroadcastf64x2	0(%rdi),%zmm6
	vpxorq	%zmm6,%zmm7,%zmm7
	vpxorq	%zmm6,%zmm10,%zmm10
	vpxorq	%zmm6,%zmm11,%zmm11
	vpxorq	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	16(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	32(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	48(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	64(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	80(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	96(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	112(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	128(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	144(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	160(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	176(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	192(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	208(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	224(%rdi),%zmm6
	vaesenclast	%zmm6,%zmm7,%zmm7
	vaesenclast	%zmm6,%zmm10,%zmm10
	vaesenclast	%zmm6,%zmm11,%zmm11
	vaesenclast	%zmm6,%zmm12,%zmm12


	vpxorq	%zmm0,%zmm7,%zmm7
	vpxorq	%zmm3,%zmm10,%zmm10
	vpxorq	%zmm4,%zmm11,%zmm11
	vpxorq	%zmm5,%zmm12,%zmm12


	movq	%r9,%r10
	vmovdqu8	%zmm7,256(%r10,%r11,1)
	vmovdqu8	%zmm10,320(%r10,%r11,1)
	vmovdqu8	%zmm11,384(%r10,%r11,1)
	vmovdqu8	%zmm12,448(%r10,%r11,1)

	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vpshufb	%zmm29,%zmm12,%zmm12
	vmovdqa64	%zmm7,1024(%rsp)
	vmovdqa64	%zmm10,1088(%rsp)
	vmovdqa64	%zmm11,1152(%rsp)
	vmovdqa64	%zmm12,1216(%rsp)
	testq	%r14,%r14
	jnz	.L_skip_hkeys_precomputation_339
	vmovdqu64	640(%rsp),%zmm3


	vshufi64x2	$0x00,%zmm3,%zmm3,%zmm3

	vmovdqu64	576(%rsp),%zmm4
	vmovdqu64	512(%rsp),%zmm5

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,448(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,384(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,320(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,256(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,192(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,128(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,64(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,0(%rsp)
.L_skip_hkeys_precomputation_339:
	movq	$1,%r14
	addq	$512,%r11
	subq	$512,%r8

	cmpq	$768,%r8
	jb	.L_no_more_big_nblocks_334
.L_encrypt_big_nblocks_334:
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_340
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_340
.L_16_blocks_overflow_340:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_340:
	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp),%zmm18
	vmovdqa64	832(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp),%zmm1
	vmovdqa64	896(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp),%zmm18
	vmovdqa64	960(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm15,%zmm10,%zmm26
	vpxorq	%zmm12,%zmm6,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm31

	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1)
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqa64	%zmm0,1280(%rsp)
	vmovdqa64	%zmm3,1344(%rsp)
	vmovdqa64	%zmm4,1408(%rsp)
	vmovdqa64	%zmm5,1472(%rsp)
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_341
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_341
.L_16_blocks_overflow_341:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_341:
	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	256(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	320(%rsp),%zmm18
	vmovdqa64	1088(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	384(%rsp),%zmm1
	vmovdqa64	1152(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	448(%rsp),%zmm18
	vmovdqa64	1216(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	256(%rcx,%r11,1),%zmm17
	vmovdqu8	320(%rcx,%r11,1),%zmm19
	vmovdqu8	384(%rcx,%r11,1),%zmm20
	vmovdqu8	448(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vpternlogq	$0x96,%zmm12,%zmm6,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm31

	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,256(%r10,%r11,1)
	vmovdqu8	%zmm3,320(%r10,%r11,1)
	vmovdqu8	%zmm4,384(%r10,%r11,1)
	vmovdqu8	%zmm5,448(%r10,%r11,1)
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqa64	%zmm0,768(%rsp)
	vmovdqa64	%zmm3,832(%rsp)
	vmovdqa64	%zmm4,896(%rsp)
	vmovdqa64	%zmm5,960(%rsp)
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_342
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_342
.L_16_blocks_overflow_342:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_342:
	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	512(%rcx,%r11,1),%zmm17
	vmovdqu8	576(%rcx,%r11,1),%zmm19
	vmovdqu8	640(%rcx,%r11,1),%zmm20
	vmovdqu8	704(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30


	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10

	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpternlogq	$0x96,%zmm15,%zmm12,%zmm6
	vpxorq	%zmm24,%zmm6,%zmm6
	vpternlogq	$0x96,%zmm10,%zmm13,%zmm7
	vpxorq	%zmm25,%zmm7,%zmm7
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vextracti64x4	$1,%zmm6,%ymm12
	vpxorq	%ymm12,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm12
	vpxorq	%xmm12,%xmm6,%xmm6
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm31

	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm6
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,512(%r10,%r11,1)
	vmovdqu8	%zmm3,576(%r10,%r11,1)
	vmovdqu8	%zmm4,640(%r10,%r11,1)
	vmovdqu8	%zmm5,704(%r10,%r11,1)
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqa64	%zmm0,1024(%rsp)
	vmovdqa64	%zmm3,1088(%rsp)
	vmovdqa64	%zmm4,1152(%rsp)
	vmovdqa64	%zmm5,1216(%rsp)
	vmovdqa64	%zmm6,%zmm14

	addq	$768,%r11
	subq	$768,%r8
	cmpq	$768,%r8
	jae	.L_encrypt_big_nblocks_334

.L_no_more_big_nblocks_334:

	cmpq	$512,%r8
	jae	.L_encrypt_32_blocks_334

	cmpq	$256,%r8
	jae	.L_encrypt_16_blocks_334
.L_encrypt_0_blocks_ghash_32_334:
	movl	%r8d,%r10d
	andl	$~15,%r10d
	movl	$256,%ebx
	subl	%r10d,%ebx
	vmovdqa64	768(%rsp),%zmm13
	vpxorq	%zmm14,%zmm13,%zmm13
	vmovdqu64	0(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	832(%rsp),%zmm13
	vmovdqu64	64(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpxorq	%zmm10,%zmm4,%zmm26
	vpxorq	%zmm6,%zmm0,%zmm24
	vpxorq	%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	896(%rsp),%zmm13
	vmovdqu64	128(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	960(%rsp),%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	addl	$256,%ebx
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_343

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_343
	jb	.L_last_num_blocks_is_7_1_343


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_343
	jb	.L_last_num_blocks_is_11_9_343


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_343
	ja	.L_last_num_blocks_is_16_343
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_343
	jmp	.L_last_num_blocks_is_13_343

.L_last_num_blocks_is_11_9_343:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_343
	ja	.L_last_num_blocks_is_11_343
	jmp	.L_last_num_blocks_is_9_343

.L_last_num_blocks_is_7_1_343:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_343
	jb	.L_last_num_blocks_is_3_1_343

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_343
	je	.L_last_num_blocks_is_6_343
	jmp	.L_last_num_blocks_is_5_343

.L_last_num_blocks_is_3_1_343:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_343
	je	.L_last_num_blocks_is_2_343
.L_last_num_blocks_is_1_343:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_344
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_344

.L_16_blocks_overflow_344:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_344:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%xmm29,%xmm0,%xmm17
	vextracti32x4	$0,%zmm17,%xmm7


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_345





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_345
.L_small_initial_partial_block_345:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)


	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm0


	vpclmulqdq	$0x01,%xmm25,%xmm0,%xmm3
	vpslldq	$8,%xmm3,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm3


	vpclmulqdq	$0x00,%xmm3,%xmm0,%xmm4
	vpsrldq	$4,%xmm4,%xmm4
	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm4,%xmm14












	vpxorq	%xmm7,%xmm14,%xmm14

	jmp	.L_after_reduction_345
.L_small_initial_compute_done_345:
.L_after_reduction_345:
	jmp	.L_last_blocks_done_343
.L_last_num_blocks_is_2_343:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$254,%r15d
	jae	.L_16_blocks_overflow_346
	vpaddd	%ymm28,%ymm2,%ymm0
	jmp	.L_16_blocks_ok_346

.L_16_blocks_overflow_346:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%ymm29,%ymm0,%ymm0
.L_16_blocks_ok_346:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%ymm17{%k1}{z}
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vaesenclast	%ymm30,%ymm0,%ymm0
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%ymm29,%ymm0,%ymm17
	vextracti32x4	$1,%zmm17,%xmm7
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_347





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_347
.L_small_initial_partial_block_347:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_347:

	orq	%r8,%r8
	je	.L_after_reduction_347
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_347:
	jmp	.L_last_blocks_done_343
.L_last_num_blocks_is_3_343:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$253,%r15d
	jae	.L_16_blocks_overflow_348
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_348

.L_16_blocks_overflow_348:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_348:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$2,%zmm17,%xmm7
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_349





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_349
.L_small_initial_partial_block_349:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_349:

	orq	%r8,%r8
	je	.L_after_reduction_349
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_349:
	jmp	.L_last_blocks_done_343
.L_last_num_blocks_is_4_343:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$252,%r15d
	jae	.L_16_blocks_overflow_350
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_350

.L_16_blocks_overflow_350:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_350:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$3,%zmm17,%xmm7
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_351





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_351
.L_small_initial_partial_block_351:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_351:

	orq	%r8,%r8
	je	.L_after_reduction_351
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_351:
	jmp	.L_last_blocks_done_343
.L_last_num_blocks_is_5_343:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$251,%r15d
	jae	.L_16_blocks_overflow_352
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%xmm27,%xmm0,%xmm3
	jmp	.L_16_blocks_ok_352

.L_16_blocks_overflow_352:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
.L_16_blocks_ok_352:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%xmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%xmm30,%xmm3,%xmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%xmm19,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%xmm29,%xmm3,%xmm19
	vextracti32x4	$0,%zmm19,%xmm7
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_353





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_353
.L_small_initial_partial_block_353:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_353:

	orq	%r8,%r8
	je	.L_after_reduction_353
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_353:
	jmp	.L_last_blocks_done_343
.L_last_num_blocks_is_6_343:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$250,%r15d
	jae	.L_16_blocks_overflow_354
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%ymm27,%ymm0,%ymm3
	jmp	.L_16_blocks_ok_354

.L_16_blocks_overflow_354:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
.L_16_blocks_ok_354:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%ymm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%ymm30,%ymm3,%ymm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%ymm29,%ymm3,%ymm19
	vextracti32x4	$1,%zmm19,%xmm7
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_355





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_355
.L_small_initial_partial_block_355:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_355:

	orq	%r8,%r8
	je	.L_after_reduction_355
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_355:
	jmp	.L_last_blocks_done_343
.L_last_num_blocks_is_7_343:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$249,%r15d
	jae	.L_16_blocks_overflow_356
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_356

.L_16_blocks_overflow_356:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_356:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$2,%zmm19,%xmm7
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_357





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_357
.L_small_initial_partial_block_357:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_357:

	orq	%r8,%r8
	je	.L_after_reduction_357
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_357:
	jmp	.L_last_blocks_done_343
.L_last_num_blocks_is_8_343:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$248,%r15d
	jae	.L_16_blocks_overflow_358
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_358

.L_16_blocks_overflow_358:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_358:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$3,%zmm19,%xmm7
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_359





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_359
.L_small_initial_partial_block_359:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_359:

	orq	%r8,%r8
	je	.L_after_reduction_359
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_359:
	jmp	.L_last_blocks_done_343
.L_last_num_blocks_is_9_343:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$247,%r15d
	jae	.L_16_blocks_overflow_360
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%xmm27,%xmm3,%xmm4
	jmp	.L_16_blocks_ok_360

.L_16_blocks_overflow_360:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
.L_16_blocks_ok_360:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%xmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%xmm30,%xmm4,%xmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%xmm20,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%xmm29,%xmm4,%xmm20
	vextracti32x4	$0,%zmm20,%xmm7
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_361





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_361
.L_small_initial_partial_block_361:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_361:

	orq	%r8,%r8
	je	.L_after_reduction_361
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_361:
	jmp	.L_last_blocks_done_343
.L_last_num_blocks_is_10_343:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$246,%r15d
	jae	.L_16_blocks_overflow_362
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%ymm27,%ymm3,%ymm4
	jmp	.L_16_blocks_ok_362

.L_16_blocks_overflow_362:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
.L_16_blocks_ok_362:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%ymm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%ymm30,%ymm4,%ymm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%ymm20,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%ymm29,%ymm4,%ymm20
	vextracti32x4	$1,%zmm20,%xmm7
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_363





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_363
.L_small_initial_partial_block_363:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_363:

	orq	%r8,%r8
	je	.L_after_reduction_363
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_363:
	jmp	.L_last_blocks_done_343
.L_last_num_blocks_is_11_343:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$245,%r15d
	jae	.L_16_blocks_overflow_364
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_364

.L_16_blocks_overflow_364:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_364:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$2,%zmm20,%xmm7
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_365





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_365
.L_small_initial_partial_block_365:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_365:

	orq	%r8,%r8
	je	.L_after_reduction_365
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_365:
	jmp	.L_last_blocks_done_343
.L_last_num_blocks_is_12_343:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$244,%r15d
	jae	.L_16_blocks_overflow_366
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_366

.L_16_blocks_overflow_366:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_366:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$3,%zmm20,%xmm7
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_367





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_367
.L_small_initial_partial_block_367:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_367:

	orq	%r8,%r8
	je	.L_after_reduction_367
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_367:
	jmp	.L_last_blocks_done_343
.L_last_num_blocks_is_13_343:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$243,%r15d
	jae	.L_16_blocks_overflow_368
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%xmm27,%xmm4,%xmm5
	jmp	.L_16_blocks_ok_368

.L_16_blocks_overflow_368:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
.L_16_blocks_ok_368:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%xmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%xmm30,%xmm5,%xmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%xmm21,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%xmm29,%xmm5,%xmm21
	vextracti32x4	$0,%zmm21,%xmm7
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_369





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_369
.L_small_initial_partial_block_369:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_369:

	orq	%r8,%r8
	je	.L_after_reduction_369
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_369:
	jmp	.L_last_blocks_done_343
.L_last_num_blocks_is_14_343:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$242,%r15d
	jae	.L_16_blocks_overflow_370
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%ymm27,%ymm4,%ymm5
	jmp	.L_16_blocks_ok_370

.L_16_blocks_overflow_370:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
.L_16_blocks_ok_370:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%ymm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%ymm30,%ymm5,%ymm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%ymm21,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%ymm29,%ymm5,%ymm21
	vextracti32x4	$1,%zmm21,%xmm7
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_371





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_371
.L_small_initial_partial_block_371:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_371:

	orq	%r8,%r8
	je	.L_after_reduction_371
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_371:
	jmp	.L_last_blocks_done_343
.L_last_num_blocks_is_15_343:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$241,%r15d
	jae	.L_16_blocks_overflow_372
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_372

.L_16_blocks_overflow_372:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_372:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$2,%zmm21,%xmm7
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_373





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_373
.L_small_initial_partial_block_373:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_373:

	orq	%r8,%r8
	je	.L_after_reduction_373
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_373:
	jmp	.L_last_blocks_done_343
.L_last_num_blocks_is_16_343:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$240,%r15d
	jae	.L_16_blocks_overflow_374
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_374

.L_16_blocks_overflow_374:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_374:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$3,%zmm21,%xmm7
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_375:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_375:
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_375:
	jmp	.L_last_blocks_done_343
.L_last_num_blocks_is_0_343:
	vmovdqa64	1024(%rsp),%zmm13
	vmovdqu64	0(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1088(%rsp),%zmm13
	vmovdqu64	64(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1152(%rsp),%zmm13
	vmovdqu64	128(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1216(%rsp),%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

.L_last_blocks_done_343:
	vpshufb	%xmm29,%xmm2,%xmm2
	jmp	.L_ghash_done_334
.L_encrypt_32_blocks_334:
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_376
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_376
.L_16_blocks_overflow_376:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_376:
	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp),%zmm18
	vmovdqa64	832(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp),%zmm1
	vmovdqa64	896(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp),%zmm18
	vmovdqa64	960(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm15,%zmm10,%zmm26
	vpxorq	%zmm12,%zmm6,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm31

	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1)
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqa64	%zmm0,1280(%rsp)
	vmovdqa64	%zmm3,1344(%rsp)
	vmovdqa64	%zmm4,1408(%rsp)
	vmovdqa64	%zmm5,1472(%rsp)
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_377
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_377
.L_16_blocks_overflow_377:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_377:
	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	256(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	320(%rsp),%zmm18
	vmovdqa64	1088(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	384(%rsp),%zmm1
	vmovdqa64	1152(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	448(%rsp),%zmm18
	vmovdqa64	1216(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	256(%rcx,%r11,1),%zmm17
	vmovdqu8	320(%rcx,%r11,1),%zmm19
	vmovdqu8	384(%rcx,%r11,1),%zmm20
	vmovdqu8	448(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vpternlogq	$0x96,%zmm12,%zmm6,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm31

	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,256(%r10,%r11,1)
	vmovdqu8	%zmm3,320(%r10,%r11,1)
	vmovdqu8	%zmm4,384(%r10,%r11,1)
	vmovdqu8	%zmm5,448(%r10,%r11,1)
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqa64	%zmm0,768(%rsp)
	vmovdqa64	%zmm3,832(%rsp)
	vmovdqa64	%zmm4,896(%rsp)
	vmovdqa64	%zmm5,960(%rsp)
	vmovdqa64	1280(%rsp),%zmm13
	vmovdqu64	512(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1344(%rsp),%zmm13
	vmovdqu64	576(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1408(%rsp),%zmm13
	vmovdqu64	640(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1472(%rsp),%zmm13
	vmovdqu64	704(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

	subq	$512,%r8
	addq	$512,%r11
	movl	%r8d,%r10d
	andl	$~15,%r10d
	movl	$512,%ebx
	subl	%r10d,%ebx
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_378

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_378
	jb	.L_last_num_blocks_is_7_1_378


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_378
	jb	.L_last_num_blocks_is_11_9_378


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_378
	ja	.L_last_num_blocks_is_16_378
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_378
	jmp	.L_last_num_blocks_is_13_378

.L_last_num_blocks_is_11_9_378:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_378
	ja	.L_last_num_blocks_is_11_378
	jmp	.L_last_num_blocks_is_9_378

.L_last_num_blocks_is_7_1_378:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_378
	jb	.L_last_num_blocks_is_3_1_378

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_378
	je	.L_last_num_blocks_is_6_378
	jmp	.L_last_num_blocks_is_5_378

.L_last_num_blocks_is_3_1_378:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_378
	je	.L_last_num_blocks_is_2_378
.L_last_num_blocks_is_1_378:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_379
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_379

.L_16_blocks_overflow_379:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_379:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%xmm29,%xmm0,%xmm17
	vextracti32x4	$0,%zmm17,%xmm7


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_380





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_380
.L_small_initial_partial_block_380:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)


	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm0


	vpclmulqdq	$0x01,%xmm25,%xmm0,%xmm3
	vpslldq	$8,%xmm3,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm3


	vpclmulqdq	$0x00,%xmm3,%xmm0,%xmm4
	vpsrldq	$4,%xmm4,%xmm4
	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm4,%xmm14












	vpxorq	%xmm7,%xmm14,%xmm14

	jmp	.L_after_reduction_380
.L_small_initial_compute_done_380:
.L_after_reduction_380:
	jmp	.L_last_blocks_done_378
.L_last_num_blocks_is_2_378:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$254,%r15d
	jae	.L_16_blocks_overflow_381
	vpaddd	%ymm28,%ymm2,%ymm0
	jmp	.L_16_blocks_ok_381

.L_16_blocks_overflow_381:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%ymm29,%ymm0,%ymm0
.L_16_blocks_ok_381:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%ymm17{%k1}{z}
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vaesenclast	%ymm30,%ymm0,%ymm0
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%ymm29,%ymm0,%ymm17
	vextracti32x4	$1,%zmm17,%xmm7
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_382





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_382
.L_small_initial_partial_block_382:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_382:

	orq	%r8,%r8
	je	.L_after_reduction_382
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_382:
	jmp	.L_last_blocks_done_378
.L_last_num_blocks_is_3_378:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$253,%r15d
	jae	.L_16_blocks_overflow_383
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_383

.L_16_blocks_overflow_383:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_383:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$2,%zmm17,%xmm7
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_384





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_384
.L_small_initial_partial_block_384:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_384:

	orq	%r8,%r8
	je	.L_after_reduction_384
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_384:
	jmp	.L_last_blocks_done_378
.L_last_num_blocks_is_4_378:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$252,%r15d
	jae	.L_16_blocks_overflow_385
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_385

.L_16_blocks_overflow_385:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_385:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$3,%zmm17,%xmm7
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_386





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_386
.L_small_initial_partial_block_386:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_386:

	orq	%r8,%r8
	je	.L_after_reduction_386
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_386:
	jmp	.L_last_blocks_done_378
.L_last_num_blocks_is_5_378:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$251,%r15d
	jae	.L_16_blocks_overflow_387
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%xmm27,%xmm0,%xmm3
	jmp	.L_16_blocks_ok_387

.L_16_blocks_overflow_387:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
.L_16_blocks_ok_387:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%xmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%xmm30,%xmm3,%xmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%xmm19,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%xmm29,%xmm3,%xmm19
	vextracti32x4	$0,%zmm19,%xmm7
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_388





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_388
.L_small_initial_partial_block_388:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_388:

	orq	%r8,%r8
	je	.L_after_reduction_388
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_388:
	jmp	.L_last_blocks_done_378
.L_last_num_blocks_is_6_378:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$250,%r15d
	jae	.L_16_blocks_overflow_389
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%ymm27,%ymm0,%ymm3
	jmp	.L_16_blocks_ok_389

.L_16_blocks_overflow_389:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
.L_16_blocks_ok_389:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%ymm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%ymm30,%ymm3,%ymm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%ymm29,%ymm3,%ymm19
	vextracti32x4	$1,%zmm19,%xmm7
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_390





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_390
.L_small_initial_partial_block_390:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_390:

	orq	%r8,%r8
	je	.L_after_reduction_390
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_390:
	jmp	.L_last_blocks_done_378
.L_last_num_blocks_is_7_378:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$249,%r15d
	jae	.L_16_blocks_overflow_391
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_391

.L_16_blocks_overflow_391:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_391:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$2,%zmm19,%xmm7
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_392





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_392
.L_small_initial_partial_block_392:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_392:

	orq	%r8,%r8
	je	.L_after_reduction_392
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_392:
	jmp	.L_last_blocks_done_378
.L_last_num_blocks_is_8_378:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$248,%r15d
	jae	.L_16_blocks_overflow_393
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_393

.L_16_blocks_overflow_393:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_393:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$3,%zmm19,%xmm7
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_394





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_394
.L_small_initial_partial_block_394:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_394:

	orq	%r8,%r8
	je	.L_after_reduction_394
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_394:
	jmp	.L_last_blocks_done_378
.L_last_num_blocks_is_9_378:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$247,%r15d
	jae	.L_16_blocks_overflow_395
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%xmm27,%xmm3,%xmm4
	jmp	.L_16_blocks_ok_395

.L_16_blocks_overflow_395:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
.L_16_blocks_ok_395:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%xmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%xmm30,%xmm4,%xmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%xmm20,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%xmm29,%xmm4,%xmm20
	vextracti32x4	$0,%zmm20,%xmm7
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_396





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_396
.L_small_initial_partial_block_396:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_396:

	orq	%r8,%r8
	je	.L_after_reduction_396
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_396:
	jmp	.L_last_blocks_done_378
.L_last_num_blocks_is_10_378:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$246,%r15d
	jae	.L_16_blocks_overflow_397
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%ymm27,%ymm3,%ymm4
	jmp	.L_16_blocks_ok_397

.L_16_blocks_overflow_397:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
.L_16_blocks_ok_397:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%ymm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%ymm30,%ymm4,%ymm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%ymm20,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%ymm29,%ymm4,%ymm20
	vextracti32x4	$1,%zmm20,%xmm7
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_398





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_398
.L_small_initial_partial_block_398:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_398:

	orq	%r8,%r8
	je	.L_after_reduction_398
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_398:
	jmp	.L_last_blocks_done_378
.L_last_num_blocks_is_11_378:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$245,%r15d
	jae	.L_16_blocks_overflow_399
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_399

.L_16_blocks_overflow_399:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_399:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$2,%zmm20,%xmm7
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_400





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_400
.L_small_initial_partial_block_400:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_400:

	orq	%r8,%r8
	je	.L_after_reduction_400
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_400:
	jmp	.L_last_blocks_done_378
.L_last_num_blocks_is_12_378:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$244,%r15d
	jae	.L_16_blocks_overflow_401
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_401

.L_16_blocks_overflow_401:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_401:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$3,%zmm20,%xmm7
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_402





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_402
.L_small_initial_partial_block_402:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_402:

	orq	%r8,%r8
	je	.L_after_reduction_402
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_402:
	jmp	.L_last_blocks_done_378
.L_last_num_blocks_is_13_378:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$243,%r15d
	jae	.L_16_blocks_overflow_403
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%xmm27,%xmm4,%xmm5
	jmp	.L_16_blocks_ok_403

.L_16_blocks_overflow_403:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
.L_16_blocks_ok_403:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%xmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%xmm30,%xmm5,%xmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%xmm21,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%xmm29,%xmm5,%xmm21
	vextracti32x4	$0,%zmm21,%xmm7
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_404





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_404
.L_small_initial_partial_block_404:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_404:

	orq	%r8,%r8
	je	.L_after_reduction_404
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_404:
	jmp	.L_last_blocks_done_378
.L_last_num_blocks_is_14_378:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$242,%r15d
	jae	.L_16_blocks_overflow_405
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%ymm27,%ymm4,%ymm5
	jmp	.L_16_blocks_ok_405

.L_16_blocks_overflow_405:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
.L_16_blocks_ok_405:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%ymm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%ymm30,%ymm5,%ymm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%ymm21,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%ymm29,%ymm5,%ymm21
	vextracti32x4	$1,%zmm21,%xmm7
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_406





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_406
.L_small_initial_partial_block_406:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_406:

	orq	%r8,%r8
	je	.L_after_reduction_406
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_406:
	jmp	.L_last_blocks_done_378
.L_last_num_blocks_is_15_378:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$241,%r15d
	jae	.L_16_blocks_overflow_407
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_407

.L_16_blocks_overflow_407:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_407:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$2,%zmm21,%xmm7
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_408





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_408
.L_small_initial_partial_block_408:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_408:

	orq	%r8,%r8
	je	.L_after_reduction_408
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_408:
	jmp	.L_last_blocks_done_378
.L_last_num_blocks_is_16_378:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$240,%r15d
	jae	.L_16_blocks_overflow_409
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_409

.L_16_blocks_overflow_409:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_409:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$3,%zmm21,%xmm7
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_410:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_410:
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_410:
	jmp	.L_last_blocks_done_378
.L_last_num_blocks_is_0_378:
	vmovdqa64	768(%rsp),%zmm13
	vpxorq	%zmm14,%zmm13,%zmm13
	vmovdqu64	0(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	832(%rsp),%zmm13
	vmovdqu64	64(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpxorq	%zmm10,%zmm4,%zmm26
	vpxorq	%zmm6,%zmm0,%zmm24
	vpxorq	%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	896(%rsp),%zmm13
	vmovdqu64	128(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	960(%rsp),%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

.L_last_blocks_done_378:
	vpshufb	%xmm29,%xmm2,%xmm2
	jmp	.L_ghash_done_334
.L_encrypt_16_blocks_334:
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_411
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_411
.L_16_blocks_overflow_411:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_411:
	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp),%zmm18
	vmovdqa64	832(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp),%zmm1
	vmovdqa64	896(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp),%zmm18
	vmovdqa64	960(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm15,%zmm10,%zmm26
	vpxorq	%zmm12,%zmm6,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm31

	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1)
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqa64	%zmm0,1280(%rsp)
	vmovdqa64	%zmm3,1344(%rsp)
	vmovdqa64	%zmm4,1408(%rsp)
	vmovdqa64	%zmm5,1472(%rsp)
	vmovdqa64	1024(%rsp),%zmm13
	vmovdqu64	256(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1088(%rsp),%zmm13
	vmovdqu64	320(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1152(%rsp),%zmm13
	vmovdqu64	384(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1216(%rsp),%zmm13
	vmovdqu64	448(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	subq	$256,%r8
	addq	$256,%r11
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_412

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_412
	jb	.L_last_num_blocks_is_7_1_412


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_412
	jb	.L_last_num_blocks_is_11_9_412


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_412
	ja	.L_last_num_blocks_is_16_412
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_412
	jmp	.L_last_num_blocks_is_13_412

.L_last_num_blocks_is_11_9_412:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_412
	ja	.L_last_num_blocks_is_11_412
	jmp	.L_last_num_blocks_is_9_412

.L_last_num_blocks_is_7_1_412:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_412
	jb	.L_last_num_blocks_is_3_1_412

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_412
	je	.L_last_num_blocks_is_6_412
	jmp	.L_last_num_blocks_is_5_412

.L_last_num_blocks_is_3_1_412:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_412
	je	.L_last_num_blocks_is_2_412
.L_last_num_blocks_is_1_412:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_413
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_413

.L_16_blocks_overflow_413:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_413:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%xmm31,%xmm0,%xmm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%xmm29,%xmm0,%xmm17
	vextracti32x4	$0,%zmm17,%xmm7


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_414





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_414
.L_small_initial_partial_block_414:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)











	vpxorq	%xmm7,%xmm14,%xmm14

	jmp	.L_after_reduction_414
.L_small_initial_compute_done_414:
.L_after_reduction_414:
	jmp	.L_last_blocks_done_412
.L_last_num_blocks_is_2_412:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$254,%r15d
	jae	.L_16_blocks_overflow_415
	vpaddd	%ymm28,%ymm2,%ymm0
	jmp	.L_16_blocks_ok_415

.L_16_blocks_overflow_415:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%ymm29,%ymm0,%ymm0
.L_16_blocks_ok_415:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%ymm17{%k1}{z}
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%ymm31,%ymm0,%ymm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%ymm30,%ymm0,%ymm0
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%ymm29,%ymm0,%ymm17
	vextracti32x4	$1,%zmm17,%xmm7
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_416





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_416
.L_small_initial_partial_block_416:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_416:

	orq	%r8,%r8
	je	.L_after_reduction_416
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_416:
	jmp	.L_last_blocks_done_412
.L_last_num_blocks_is_3_412:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$253,%r15d
	jae	.L_16_blocks_overflow_417
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_417

.L_16_blocks_overflow_417:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_417:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$2,%zmm17,%xmm7
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_418





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_418
.L_small_initial_partial_block_418:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_418:

	orq	%r8,%r8
	je	.L_after_reduction_418
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_418:
	jmp	.L_last_blocks_done_412
.L_last_num_blocks_is_4_412:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$252,%r15d
	jae	.L_16_blocks_overflow_419
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_419

.L_16_blocks_overflow_419:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_419:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$3,%zmm17,%xmm7
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_420





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_420
.L_small_initial_partial_block_420:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_420:

	orq	%r8,%r8
	je	.L_after_reduction_420
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_420:
	jmp	.L_last_blocks_done_412
.L_last_num_blocks_is_5_412:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$251,%r15d
	jae	.L_16_blocks_overflow_421
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%xmm27,%xmm0,%xmm3
	jmp	.L_16_blocks_ok_421

.L_16_blocks_overflow_421:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
.L_16_blocks_ok_421:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%xmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%xmm30,%xmm3,%xmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%xmm19,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%xmm29,%xmm3,%xmm19
	vextracti32x4	$0,%zmm19,%xmm7
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_422





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_422
.L_small_initial_partial_block_422:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_422:

	orq	%r8,%r8
	je	.L_after_reduction_422
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_422:
	jmp	.L_last_blocks_done_412
.L_last_num_blocks_is_6_412:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$250,%r15d
	jae	.L_16_blocks_overflow_423
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%ymm27,%ymm0,%ymm3
	jmp	.L_16_blocks_ok_423

.L_16_blocks_overflow_423:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
.L_16_blocks_ok_423:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%ymm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%ymm30,%ymm3,%ymm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%ymm29,%ymm3,%ymm19
	vextracti32x4	$1,%zmm19,%xmm7
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_424





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_424
.L_small_initial_partial_block_424:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_424:

	orq	%r8,%r8
	je	.L_after_reduction_424
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_424:
	jmp	.L_last_blocks_done_412
.L_last_num_blocks_is_7_412:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$249,%r15d
	jae	.L_16_blocks_overflow_425
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_425

.L_16_blocks_overflow_425:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_425:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$2,%zmm19,%xmm7
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_426





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_426
.L_small_initial_partial_block_426:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_426:

	orq	%r8,%r8
	je	.L_after_reduction_426
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_426:
	jmp	.L_last_blocks_done_412
.L_last_num_blocks_is_8_412:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$248,%r15d
	jae	.L_16_blocks_overflow_427
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_427

.L_16_blocks_overflow_427:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_427:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$3,%zmm19,%xmm7
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_428





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_428
.L_small_initial_partial_block_428:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_428:

	orq	%r8,%r8
	je	.L_after_reduction_428
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_428:
	jmp	.L_last_blocks_done_412
.L_last_num_blocks_is_9_412:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$247,%r15d
	jae	.L_16_blocks_overflow_429
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%xmm27,%xmm3,%xmm4
	jmp	.L_16_blocks_ok_429

.L_16_blocks_overflow_429:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
.L_16_blocks_ok_429:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%xmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%xmm30,%xmm4,%xmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%xmm20,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%xmm29,%xmm4,%xmm20
	vextracti32x4	$0,%zmm20,%xmm7
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_430





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_430
.L_small_initial_partial_block_430:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_430:

	orq	%r8,%r8
	je	.L_after_reduction_430
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_430:
	jmp	.L_last_blocks_done_412
.L_last_num_blocks_is_10_412:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$246,%r15d
	jae	.L_16_blocks_overflow_431
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%ymm27,%ymm3,%ymm4
	jmp	.L_16_blocks_ok_431

.L_16_blocks_overflow_431:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
.L_16_blocks_ok_431:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%ymm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%ymm30,%ymm4,%ymm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%ymm20,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%ymm29,%ymm4,%ymm20
	vextracti32x4	$1,%zmm20,%xmm7
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_432





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_432
.L_small_initial_partial_block_432:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_432:

	orq	%r8,%r8
	je	.L_after_reduction_432
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_432:
	jmp	.L_last_blocks_done_412
.L_last_num_blocks_is_11_412:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$245,%r15d
	jae	.L_16_blocks_overflow_433
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_433

.L_16_blocks_overflow_433:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_433:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$2,%zmm20,%xmm7
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_434





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_434
.L_small_initial_partial_block_434:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_434:

	orq	%r8,%r8
	je	.L_after_reduction_434
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_434:
	jmp	.L_last_blocks_done_412
.L_last_num_blocks_is_12_412:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$244,%r15d
	jae	.L_16_blocks_overflow_435
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_435

.L_16_blocks_overflow_435:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_435:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$3,%zmm20,%xmm7
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_436





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_436
.L_small_initial_partial_block_436:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_436:

	orq	%r8,%r8
	je	.L_after_reduction_436
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_436:
	jmp	.L_last_blocks_done_412
.L_last_num_blocks_is_13_412:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$243,%r15d
	jae	.L_16_blocks_overflow_437
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%xmm27,%xmm4,%xmm5
	jmp	.L_16_blocks_ok_437

.L_16_blocks_overflow_437:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
.L_16_blocks_ok_437:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%xmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%xmm30,%xmm5,%xmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%xmm21,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%xmm29,%xmm5,%xmm21
	vextracti32x4	$0,%zmm21,%xmm7
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_438





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_438
.L_small_initial_partial_block_438:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_438:

	orq	%r8,%r8
	je	.L_after_reduction_438
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_438:
	jmp	.L_last_blocks_done_412
.L_last_num_blocks_is_14_412:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$242,%r15d
	jae	.L_16_blocks_overflow_439
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%ymm27,%ymm4,%ymm5
	jmp	.L_16_blocks_ok_439

.L_16_blocks_overflow_439:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
.L_16_blocks_ok_439:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%ymm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%ymm30,%ymm5,%ymm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%ymm21,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%ymm29,%ymm5,%ymm21
	vextracti32x4	$1,%zmm21,%xmm7
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_440





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_440
.L_small_initial_partial_block_440:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_440:

	orq	%r8,%r8
	je	.L_after_reduction_440
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_440:
	jmp	.L_last_blocks_done_412
.L_last_num_blocks_is_15_412:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$241,%r15d
	jae	.L_16_blocks_overflow_441
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_441

.L_16_blocks_overflow_441:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_441:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$2,%zmm21,%xmm7
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_442





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_442
.L_small_initial_partial_block_442:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_442:

	orq	%r8,%r8
	je	.L_after_reduction_442
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_442:
	jmp	.L_last_blocks_done_412
.L_last_num_blocks_is_16_412:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$240,%r15d
	jae	.L_16_blocks_overflow_443
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_443

.L_16_blocks_overflow_443:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_443:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$3,%zmm21,%xmm7
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_444:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_444:
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_444:
	jmp	.L_last_blocks_done_412
.L_last_num_blocks_is_0_412:
	vmovdqa64	1280(%rsp),%zmm13
	vmovdqu64	512(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1344(%rsp),%zmm13
	vmovdqu64	576(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1408(%rsp),%zmm13
	vmovdqu64	640(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1472(%rsp),%zmm13
	vmovdqu64	704(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

.L_last_blocks_done_412:
	vpshufb	%xmm29,%xmm2,%xmm2
	jmp	.L_ghash_done_334

.L_message_below_32_blocks_334:


	subq	$256,%r8
	addq	$256,%r11
	movl	%r8d,%r10d
	testq	%r14,%r14
	jnz	.L_skip_hkeys_precomputation_445
	vmovdqu64	640(%rsp),%zmm3


	vshufi64x2	$0x00,%zmm3,%zmm3,%zmm3

	vmovdqu64	576(%rsp),%zmm4
	vmovdqu64	512(%rsp),%zmm5

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,448(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,384(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,320(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,256(%rsp)
.L_skip_hkeys_precomputation_445:
	movq	$1,%r14
	andl	$~15,%r10d
	movl	$512,%ebx
	subl	%r10d,%ebx
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_446

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_446
	jb	.L_last_num_blocks_is_7_1_446


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_446
	jb	.L_last_num_blocks_is_11_9_446


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_446
	ja	.L_last_num_blocks_is_16_446
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_446
	jmp	.L_last_num_blocks_is_13_446

.L_last_num_blocks_is_11_9_446:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_446
	ja	.L_last_num_blocks_is_11_446
	jmp	.L_last_num_blocks_is_9_446

.L_last_num_blocks_is_7_1_446:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_446
	jb	.L_last_num_blocks_is_3_1_446

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_446
	je	.L_last_num_blocks_is_6_446
	jmp	.L_last_num_blocks_is_5_446

.L_last_num_blocks_is_3_1_446:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_446
	je	.L_last_num_blocks_is_2_446
.L_last_num_blocks_is_1_446:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_447
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_447

.L_16_blocks_overflow_447:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_447:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%xmm29,%xmm0,%xmm17
	vextracti32x4	$0,%zmm17,%xmm7


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_448





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_448
.L_small_initial_partial_block_448:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)


	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm0


	vpclmulqdq	$0x01,%xmm25,%xmm0,%xmm3
	vpslldq	$8,%xmm3,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm3


	vpclmulqdq	$0x00,%xmm3,%xmm0,%xmm4
	vpsrldq	$4,%xmm4,%xmm4
	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm4,%xmm14












	vpxorq	%xmm7,%xmm14,%xmm14

	jmp	.L_after_reduction_448
.L_small_initial_compute_done_448:
.L_after_reduction_448:
	jmp	.L_last_blocks_done_446
.L_last_num_blocks_is_2_446:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$254,%r15d
	jae	.L_16_blocks_overflow_449
	vpaddd	%ymm28,%ymm2,%ymm0
	jmp	.L_16_blocks_ok_449

.L_16_blocks_overflow_449:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%ymm29,%ymm0,%ymm0
.L_16_blocks_ok_449:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%ymm17{%k1}{z}
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vaesenclast	%ymm30,%ymm0,%ymm0
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%ymm29,%ymm0,%ymm17
	vextracti32x4	$1,%zmm17,%xmm7
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_450





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_450
.L_small_initial_partial_block_450:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_450:

	orq	%r8,%r8
	je	.L_after_reduction_450
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_450:
	jmp	.L_last_blocks_done_446
.L_last_num_blocks_is_3_446:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$253,%r15d
	jae	.L_16_blocks_overflow_451
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_451

.L_16_blocks_overflow_451:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_451:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$2,%zmm17,%xmm7
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_452





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_452
.L_small_initial_partial_block_452:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_452:

	orq	%r8,%r8
	je	.L_after_reduction_452
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_452:
	jmp	.L_last_blocks_done_446
.L_last_num_blocks_is_4_446:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$252,%r15d
	jae	.L_16_blocks_overflow_453
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_453

.L_16_blocks_overflow_453:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_453:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$3,%zmm17,%xmm7
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_454





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_454
.L_small_initial_partial_block_454:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_454:

	orq	%r8,%r8
	je	.L_after_reduction_454
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_454:
	jmp	.L_last_blocks_done_446
.L_last_num_blocks_is_5_446:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$251,%r15d
	jae	.L_16_blocks_overflow_455
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%xmm27,%xmm0,%xmm3
	jmp	.L_16_blocks_ok_455

.L_16_blocks_overflow_455:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
.L_16_blocks_ok_455:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%xmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%xmm30,%xmm3,%xmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%xmm19,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%xmm29,%xmm3,%xmm19
	vextracti32x4	$0,%zmm19,%xmm7
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_456





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_456
.L_small_initial_partial_block_456:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_456:

	orq	%r8,%r8
	je	.L_after_reduction_456
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_456:
	jmp	.L_last_blocks_done_446
.L_last_num_blocks_is_6_446:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$250,%r15d
	jae	.L_16_blocks_overflow_457
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%ymm27,%ymm0,%ymm3
	jmp	.L_16_blocks_ok_457

.L_16_blocks_overflow_457:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
.L_16_blocks_ok_457:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%ymm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%ymm30,%ymm3,%ymm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%ymm29,%ymm3,%ymm19
	vextracti32x4	$1,%zmm19,%xmm7
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_458





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_458
.L_small_initial_partial_block_458:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_458:

	orq	%r8,%r8
	je	.L_after_reduction_458
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_458:
	jmp	.L_last_blocks_done_446
.L_last_num_blocks_is_7_446:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$249,%r15d
	jae	.L_16_blocks_overflow_459
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_459

.L_16_blocks_overflow_459:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_459:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$2,%zmm19,%xmm7
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_460





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_460
.L_small_initial_partial_block_460:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_460:

	orq	%r8,%r8
	je	.L_after_reduction_460
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_460:
	jmp	.L_last_blocks_done_446
.L_last_num_blocks_is_8_446:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$248,%r15d
	jae	.L_16_blocks_overflow_461
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_461

.L_16_blocks_overflow_461:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_461:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$3,%zmm19,%xmm7
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_462





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_462
.L_small_initial_partial_block_462:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_462:

	orq	%r8,%r8
	je	.L_after_reduction_462
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_462:
	jmp	.L_last_blocks_done_446
.L_last_num_blocks_is_9_446:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$247,%r15d
	jae	.L_16_blocks_overflow_463
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%xmm27,%xmm3,%xmm4
	jmp	.L_16_blocks_ok_463

.L_16_blocks_overflow_463:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
.L_16_blocks_ok_463:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%xmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%xmm30,%xmm4,%xmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%xmm20,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%xmm29,%xmm4,%xmm20
	vextracti32x4	$0,%zmm20,%xmm7
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_464





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_464
.L_small_initial_partial_block_464:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_464:

	orq	%r8,%r8
	je	.L_after_reduction_464
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_464:
	jmp	.L_last_blocks_done_446
.L_last_num_blocks_is_10_446:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$246,%r15d
	jae	.L_16_blocks_overflow_465
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%ymm27,%ymm3,%ymm4
	jmp	.L_16_blocks_ok_465

.L_16_blocks_overflow_465:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
.L_16_blocks_ok_465:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%ymm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%ymm30,%ymm4,%ymm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%ymm20,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%ymm29,%ymm4,%ymm20
	vextracti32x4	$1,%zmm20,%xmm7
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_466





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_466
.L_small_initial_partial_block_466:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_466:

	orq	%r8,%r8
	je	.L_after_reduction_466
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_466:
	jmp	.L_last_blocks_done_446
.L_last_num_blocks_is_11_446:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$245,%r15d
	jae	.L_16_blocks_overflow_467
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_467

.L_16_blocks_overflow_467:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_467:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$2,%zmm20,%xmm7
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_468





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_468
.L_small_initial_partial_block_468:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_468:

	orq	%r8,%r8
	je	.L_after_reduction_468
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_468:
	jmp	.L_last_blocks_done_446
.L_last_num_blocks_is_12_446:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$244,%r15d
	jae	.L_16_blocks_overflow_469
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_469

.L_16_blocks_overflow_469:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_469:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$3,%zmm20,%xmm7
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_470





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_470
.L_small_initial_partial_block_470:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_470:

	orq	%r8,%r8
	je	.L_after_reduction_470
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_470:
	jmp	.L_last_blocks_done_446
.L_last_num_blocks_is_13_446:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$243,%r15d
	jae	.L_16_blocks_overflow_471
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%xmm27,%xmm4,%xmm5
	jmp	.L_16_blocks_ok_471

.L_16_blocks_overflow_471:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
.L_16_blocks_ok_471:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%xmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%xmm30,%xmm5,%xmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%xmm21,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%xmm29,%xmm5,%xmm21
	vextracti32x4	$0,%zmm21,%xmm7
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_472





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_472
.L_small_initial_partial_block_472:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_472:

	orq	%r8,%r8
	je	.L_after_reduction_472
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_472:
	jmp	.L_last_blocks_done_446
.L_last_num_blocks_is_14_446:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$242,%r15d
	jae	.L_16_blocks_overflow_473
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%ymm27,%ymm4,%ymm5
	jmp	.L_16_blocks_ok_473

.L_16_blocks_overflow_473:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
.L_16_blocks_ok_473:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%ymm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%ymm30,%ymm5,%ymm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%ymm21,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%ymm29,%ymm5,%ymm21
	vextracti32x4	$1,%zmm21,%xmm7
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_474





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_474
.L_small_initial_partial_block_474:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_474:

	orq	%r8,%r8
	je	.L_after_reduction_474
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_474:
	jmp	.L_last_blocks_done_446
.L_last_num_blocks_is_15_446:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$241,%r15d
	jae	.L_16_blocks_overflow_475
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_475

.L_16_blocks_overflow_475:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_475:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$2,%zmm21,%xmm7
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_476





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_476
.L_small_initial_partial_block_476:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_476:

	orq	%r8,%r8
	je	.L_after_reduction_476
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_476:
	jmp	.L_last_blocks_done_446
.L_last_num_blocks_is_16_446:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$240,%r15d
	jae	.L_16_blocks_overflow_477
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_477

.L_16_blocks_overflow_477:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_477:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$3,%zmm21,%xmm7
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_478:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_478:
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_478:
	jmp	.L_last_blocks_done_446
.L_last_num_blocks_is_0_446:
	vmovdqa64	768(%rsp),%zmm13
	vpxorq	%zmm14,%zmm13,%zmm13
	vmovdqu64	0(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	832(%rsp),%zmm13
	vmovdqu64	64(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpxorq	%zmm10,%zmm4,%zmm26
	vpxorq	%zmm6,%zmm0,%zmm24
	vpxorq	%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	896(%rsp),%zmm13
	vmovdqu64	128(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	960(%rsp),%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

.L_last_blocks_done_446:
	vpshufb	%xmm29,%xmm2,%xmm2
	jmp	.L_ghash_done_334

.L_message_below_equal_16_blocks_334:


	movl	%r8d,%r12d
	addl	$15,%r12d
	shrl	$4,%r12d
	cmpq	$8,%r12
	je	.L_small_initial_num_blocks_is_8_479
	jl	.L_small_initial_num_blocks_is_7_1_479


	cmpq	$12,%r12
	je	.L_small_initial_num_blocks_is_12_479
	jl	.L_small_initial_num_blocks_is_11_9_479


	cmpq	$16,%r12
	je	.L_small_initial_num_blocks_is_16_479
	cmpq	$15,%r12
	je	.L_small_initial_num_blocks_is_15_479
	cmpq	$14,%r12
	je	.L_small_initial_num_blocks_is_14_479
	jmp	.L_small_initial_num_blocks_is_13_479

.L_small_initial_num_blocks_is_11_9_479:

	cmpq	$11,%r12
	je	.L_small_initial_num_blocks_is_11_479
	cmpq	$10,%r12
	je	.L_small_initial_num_blocks_is_10_479
	jmp	.L_small_initial_num_blocks_is_9_479

.L_small_initial_num_blocks_is_7_1_479:
	cmpq	$4,%r12
	je	.L_small_initial_num_blocks_is_4_479
	jl	.L_small_initial_num_blocks_is_3_1_479

	cmpq	$7,%r12
	je	.L_small_initial_num_blocks_is_7_479
	cmpq	$6,%r12
	je	.L_small_initial_num_blocks_is_6_479
	jmp	.L_small_initial_num_blocks_is_5_479

.L_small_initial_num_blocks_is_3_1_479:

	cmpq	$3,%r12
	je	.L_small_initial_num_blocks_is_3_479
	cmpq	$2,%r12
	je	.L_small_initial_num_blocks_is_2_479





.L_small_initial_num_blocks_is_1_479:
	vmovdqa64	SHUF_MASK(%rip),%xmm29
	vpaddd	ONE(%rip),%xmm2,%xmm0
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$0,%zmm0,%xmm2
	vpshufb	%xmm29,%xmm0,%xmm0
	vmovdqu8	0(%rcx,%r11,1),%xmm6{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%xmm15,%xmm0,%xmm0
	vpxorq	%xmm6,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm12
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%xmm29,%xmm0,%xmm6
	vextracti32x4	$0,%zmm6,%xmm13


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_480





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm6,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm6,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm6,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm6,%xmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_480
.L_small_initial_partial_block_480:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)











	vpxorq	%xmm13,%xmm14,%xmm14

	jmp	.L_after_reduction_480
.L_small_initial_compute_done_480:
.L_after_reduction_480:
	jmp	.L_small_initial_blocks_encrypted_479
.L_small_initial_num_blocks_is_2_479:
	vmovdqa64	SHUF_MASK(%rip),%ymm29
	vshufi64x2	$0,%ymm2,%ymm2,%ymm0
	vpaddd	ddq_add_1234(%rip),%ymm0,%ymm0
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$1,%zmm0,%xmm2
	vpshufb	%ymm29,%ymm0,%ymm0
	vmovdqu8	0(%rcx,%r11,1),%ymm6{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%ymm15,%ymm0,%ymm0
	vpxorq	%ymm6,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm12
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%ymm29,%ymm0,%ymm6
	vextracti32x4	$1,%zmm6,%xmm13
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_481





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm6,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm6,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm6,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm6,%ymm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_481
.L_small_initial_partial_block_481:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm6,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm6,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm6,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm6,%xmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_481:

	orq	%r8,%r8
	je	.L_after_reduction_481
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_481:
	jmp	.L_small_initial_blocks_encrypted_479
.L_small_initial_num_blocks_is_3_479:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$2,%zmm0,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vmovdqu8	0(%rcx,%r11,1),%zmm6{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm6,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vextracti32x4	$2,%zmm6,%xmm13
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_482





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_482
.L_small_initial_partial_block_482:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm6,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm6,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm6,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm6,%ymm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_482:

	orq	%r8,%r8
	je	.L_after_reduction_482
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_482:
	jmp	.L_small_initial_blocks_encrypted_479
.L_small_initial_num_blocks_is_4_479:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$3,%zmm0,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vmovdqu8	0(%rcx,%r11,1),%zmm6{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm6,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vextracti32x4	$3,%zmm6,%xmm13
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_483





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_483
.L_small_initial_partial_block_483:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_483:

	orq	%r8,%r8
	je	.L_after_reduction_483
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_483:
	jmp	.L_small_initial_blocks_encrypted_479
.L_small_initial_num_blocks_is_5_479:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$64,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$0,%zmm3,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%xmm7{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%xmm15,%xmm3,%xmm3
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%xmm7,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%xmm29,%xmm3,%xmm7
	vextracti32x4	$0,%zmm7,%xmm13
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_484





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm7,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm7,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm7,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm7,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_484
.L_small_initial_partial_block_484:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_484:

	orq	%r8,%r8
	je	.L_after_reduction_484
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_484:
	jmp	.L_small_initial_blocks_encrypted_479
.L_small_initial_num_blocks_is_6_479:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$64,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$1,%zmm3,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%ymm7{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%ymm15,%ymm3,%ymm3
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%ymm7,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%ymm29,%ymm3,%ymm7
	vextracti32x4	$1,%zmm7,%xmm13
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_485





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm7,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm7,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm7,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm7,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_485
.L_small_initial_partial_block_485:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm7,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm7,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm7,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm7,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_485:

	orq	%r8,%r8
	je	.L_after_reduction_485
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_485:
	jmp	.L_small_initial_blocks_encrypted_479
.L_small_initial_num_blocks_is_7_479:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$64,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$2,%zmm3,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vextracti32x4	$2,%zmm7,%xmm13
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_486





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_486
.L_small_initial_partial_block_486:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm7,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm7,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm7,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm7,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_486:

	orq	%r8,%r8
	je	.L_after_reduction_486
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_486:
	jmp	.L_small_initial_blocks_encrypted_479
.L_small_initial_num_blocks_is_8_479:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$64,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$3,%zmm3,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vextracti32x4	$3,%zmm7,%xmm13
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_487





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	224(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_487
.L_small_initial_partial_block_487:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_487:

	orq	%r8,%r8
	je	.L_after_reduction_487
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_487:
	jmp	.L_small_initial_blocks_encrypted_479
.L_small_initial_num_blocks_is_9_479:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$128,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$0,%zmm4,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%xmm10{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%xmm15,%xmm4,%xmm4
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%xmm10,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%xmm29,%xmm4,%xmm10
	vextracti32x4	$0,%zmm10,%xmm13
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_488





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	208(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm10,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm10,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm10,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm10,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_488
.L_small_initial_partial_block_488:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	224(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_488:

	orq	%r8,%r8
	je	.L_after_reduction_488
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_488:
	jmp	.L_small_initial_blocks_encrypted_479
.L_small_initial_num_blocks_is_10_479:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$128,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$1,%zmm4,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%ymm10{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%ymm15,%ymm4,%ymm4
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%ymm10,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%ymm29,%ymm4,%ymm10
	vextracti32x4	$1,%zmm10,%xmm13
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_489





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	192(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm10,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm10,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm10,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm10,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_489
.L_small_initial_partial_block_489:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	208(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm10,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm10,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm10,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm10,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_489:

	orq	%r8,%r8
	je	.L_after_reduction_489
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_489:
	jmp	.L_small_initial_blocks_encrypted_479
.L_small_initial_num_blocks_is_11_479:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$128,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$2,%zmm4,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%zmm29,%zmm4,%zmm10
	vextracti32x4	$2,%zmm10,%xmm13
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_490





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	176(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_490
.L_small_initial_partial_block_490:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	192(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm10,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm10,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm10,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm10,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_490:

	orq	%r8,%r8
	je	.L_after_reduction_490
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_490:
	jmp	.L_small_initial_blocks_encrypted_479
.L_small_initial_num_blocks_is_12_479:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$128,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$3,%zmm4,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%zmm29,%zmm4,%zmm10
	vextracti32x4	$3,%zmm10,%xmm13
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_491





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	160(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	224(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_491
.L_small_initial_partial_block_491:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	176(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_491:

	orq	%r8,%r8
	je	.L_after_reduction_491
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_491:
	jmp	.L_small_initial_blocks_encrypted_479
.L_small_initial_num_blocks_is_13_479:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	vpaddd	ddq_add_8888(%rip),%zmm3,%zmm5
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$192,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$0,%zmm5,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10
	vmovdqu8	192(%rcx,%r11,1),%xmm11{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vpxorq	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vaesenclast	%xmm15,%xmm5,%xmm5
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vpxorq	%xmm11,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%zmm29,%zmm4,%zmm10
	vpshufb	%xmm29,%xmm5,%xmm11
	vextracti32x4	$0,%zmm11,%xmm13
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_492





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	144(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	208(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm11,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm11,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm11,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm11,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_492
.L_small_initial_partial_block_492:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	160(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	224(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_492:

	orq	%r8,%r8
	je	.L_after_reduction_492
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_492:
	jmp	.L_small_initial_blocks_encrypted_479
.L_small_initial_num_blocks_is_14_479:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	vpaddd	ddq_add_8888(%rip),%zmm3,%zmm5
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$192,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$1,%zmm5,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10
	vmovdqu8	192(%rcx,%r11,1),%ymm11{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vpxorq	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vaesenclast	%ymm15,%ymm5,%ymm5
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vpxorq	%ymm11,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%zmm29,%zmm4,%zmm10
	vpshufb	%ymm29,%ymm5,%ymm11
	vextracti32x4	$1,%zmm11,%xmm13
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_493





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	128(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	192(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm11,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm11,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm11,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm11,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_493
.L_small_initial_partial_block_493:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	144(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	208(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm11,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm11,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm11,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm11,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_493:

	orq	%r8,%r8
	je	.L_after_reduction_493
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_493:
	jmp	.L_small_initial_blocks_encrypted_479
.L_small_initial_num_blocks_is_15_479:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	vpaddd	ddq_add_8888(%rip),%zmm3,%zmm5
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$192,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$2,%zmm5,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10
	vmovdqu8	192(%rcx,%r11,1),%zmm11{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vpxorq	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vaesenclast	%zmm15,%zmm5,%zmm5
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vpxorq	%zmm11,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%zmm29,%zmm4,%zmm10
	vpshufb	%zmm29,%zmm5,%zmm11
	vextracti32x4	$2,%zmm11,%xmm13
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_494





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	112(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	176(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm11,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm11,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm11,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm11,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_494
.L_small_initial_partial_block_494:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	128(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	192(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm11,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm11,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm11,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm11,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_494:

	orq	%r8,%r8
	je	.L_after_reduction_494
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_494:
	jmp	.L_small_initial_blocks_encrypted_479
.L_small_initial_num_blocks_is_16_479:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	vpaddd	ddq_add_8888(%rip),%zmm3,%zmm5
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$192,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$3,%zmm5,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10
	vmovdqu8	192(%rcx,%r11,1),%zmm11{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vpxorq	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vaesenclast	%zmm15,%zmm5,%zmm5
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vpxorq	%zmm11,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%zmm29,%zmm4,%zmm10
	vpshufb	%zmm29,%zmm5,%zmm11
	vextracti32x4	$3,%zmm11,%xmm13
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_495:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	112(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	176(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm11,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm11,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm11,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm11,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_495:
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_495:
.L_small_initial_blocks_encrypted_479:
.L_ghash_done_334:
	vmovdqu64	%xmm2,0(%rsi)
	vmovdqu64	%xmm14,64(%rsi)
.L_enc_dec_done_334:
	jmp	.Lexit_gcm_encrypt
.Lexit_gcm_encrypt:
	cmpq	$256,%r8
	jbe	.Lskip_hkeys_cleanup_496
	vpxor	%xmm0,%xmm0,%xmm0
	vmovdqa64	%zmm0,0(%rsp)
	vmovdqa64	%zmm0,64(%rsp)
	vmovdqa64	%zmm0,128(%rsp)
	vmovdqa64	%zmm0,192(%rsp)
	vmovdqa64	%zmm0,256(%rsp)
	vmovdqa64	%zmm0,320(%rsp)
	vmovdqa64	%zmm0,384(%rsp)
	vmovdqa64	%zmm0,448(%rsp)
	vmovdqa64	%zmm0,512(%rsp)
	vmovdqa64	%zmm0,576(%rsp)
	vmovdqa64	%zmm0,640(%rsp)
	vmovdqa64	%zmm0,704(%rsp)
.Lskip_hkeys_cleanup_496:
	vzeroupper
	leaq	(%rbp),%rsp
.cfi_def_cfa_register	%rsp
	popq	%r15
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r15
	popq	%r14
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r14
	popq	%r13
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r13
	popq	%r12
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r12
	popq	%rbp
.cfi_adjust_cfa_offset	-8
.cfi_restore	%rbp
	popq	%rbx
.cfi_adjust_cfa_offset	-8
.cfi_restore	%rbx
	.byte	0xf3,0xc3
.Lencrypt_seh_end:
.cfi_endproc	
.size	ossl_aes_gcm_encrypt_avx512, .-ossl_aes_gcm_encrypt_avx512
.globl	ossl_aes_gcm_decrypt_avx512
.type	ossl_aes_gcm_decrypt_avx512,@function
.align	32
ossl_aes_gcm_decrypt_avx512:
.cfi_startproc	
.Ldecrypt_seh_begin:
.byte	243,15,30,250
	pushq	%rbx
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbx,-16
.Ldecrypt_seh_push_rbx:
	pushq	%rbp
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbp,-24
.Ldecrypt_seh_push_rbp:
	pushq	%r12
.cfi_adjust_cfa_offset	8
.cfi_offset	%r12,-32
.Ldecrypt_seh_push_r12:
	pushq	%r13
.cfi_adjust_cfa_offset	8
.cfi_offset	%r13,-40
.Ldecrypt_seh_push_r13:
	pushq	%r14
.cfi_adjust_cfa_offset	8
.cfi_offset	%r14,-48
.Ldecrypt_seh_push_r14:
	pushq	%r15
.cfi_adjust_cfa_offset	8
.cfi_offset	%r15,-56
.Ldecrypt_seh_push_r15:










	leaq	0(%rsp),%rbp
.cfi_def_cfa_register	%rbp
.Ldecrypt_seh_setfp:

.Ldecrypt_seh_prolog_end:
	subq	$1588,%rsp
	andq	$(-64),%rsp


	movl	240(%rdi),%eax
	cmpl	$9,%eax
	je	.Laes_gcm_decrypt_128_avx512
	cmpl	$11,%eax
	je	.Laes_gcm_decrypt_192_avx512
	cmpl	$13,%eax
	je	.Laes_gcm_decrypt_256_avx512
	xorl	%eax,%eax
	jmp	.Lexit_gcm_decrypt
.align	32
.Laes_gcm_decrypt_128_avx512:
	orq	%r8,%r8
	je	.L_enc_dec_done_497
	xorq	%r14,%r14
	vmovdqu64	64(%rsi),%xmm14

	movq	(%rdx),%r11
	orq	%r11,%r11
	je	.L_partial_block_done_498
	movl	$16,%r10d
	leaq	byte_len_to_mask_table(%rip),%r12
	cmpq	%r10,%r8
	cmovcq	%r8,%r10
	kmovw	(%r12,%r10,2),%k1
	vmovdqu8	(%rcx),%xmm0{%k1}{z}

	vmovdqu64	16(%rsi),%xmm3
	vmovdqu64	336(%rsi),%xmm4



	leaq	SHIFT_MASK(%rip),%r12
	addq	%r11,%r12
	vmovdqu64	(%r12),%xmm5
	vpshufb	%xmm5,%xmm3,%xmm3

	vmovdqa64	%xmm0,%xmm6
	vpxorq	%xmm0,%xmm3,%xmm3


	leaq	(%r8,%r11,1),%r13
	subq	$16,%r13
	jge	.L_no_extra_mask_498
	subq	%r13,%r12
.L_no_extra_mask_498:



	vmovdqu64	16(%r12),%xmm0
	vpand	%xmm0,%xmm3,%xmm3
	vpand	%xmm0,%xmm6,%xmm6
	vpshufb	SHUF_MASK(%rip),%xmm6,%xmm6
	vpshufb	%xmm5,%xmm6,%xmm6
	vpxorq	%xmm6,%xmm14,%xmm14
	cmpq	$0,%r13
	jl	.L_partial_incomplete_498

	vpclmulqdq	$0x11,%xmm4,%xmm14,%xmm7
	vpclmulqdq	$0x00,%xmm4,%xmm14,%xmm10
	vpclmulqdq	$0x01,%xmm4,%xmm14,%xmm11
	vpclmulqdq	$0x10,%xmm4,%xmm14,%xmm14
	vpxorq	%xmm11,%xmm14,%xmm14

	vpsrldq	$8,%xmm14,%xmm11
	vpslldq	$8,%xmm14,%xmm14
	vpxorq	%xmm11,%xmm7,%xmm7
	vpxorq	%xmm10,%xmm14,%xmm14



	vmovdqu64	POLY2(%rip),%xmm11

	vpclmulqdq	$0x01,%xmm14,%xmm11,%xmm10
	vpslldq	$8,%xmm10,%xmm10
	vpxorq	%xmm10,%xmm14,%xmm14



	vpclmulqdq	$0x00,%xmm14,%xmm11,%xmm10
	vpsrldq	$4,%xmm10,%xmm10
	vpclmulqdq	$0x10,%xmm14,%xmm11,%xmm14
	vpslldq	$4,%xmm14,%xmm14

	vpternlogq	$0x96,%xmm10,%xmm7,%xmm14

	movq	$0,(%rdx)

	movq	%r11,%r12
	movq	$16,%r11
	subq	%r12,%r11
	jmp	.L_enc_dec_done_498

.L_partial_incomplete_498:
	addq	%r8,(%rdx)
	movq	%r8,%r11

.L_enc_dec_done_498:


	leaq	byte_len_to_mask_table(%rip),%r12
	kmovw	(%r12,%r11,2),%k1
	vmovdqu64	%xmm14,64(%rsi)
	movq	%r9,%r12
	vmovdqu8	%xmm3,(%r12){%k1}
.L_partial_block_done_498:
	vmovdqu64	0(%rsi),%xmm2
	subq	%r11,%r8
	je	.L_enc_dec_done_497
	cmpq	$256,%r8
	jbe	.L_message_below_equal_16_blocks_497

	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vmovdqa64	ddq_addbe_4444(%rip),%zmm27
	vmovdqa64	ddq_addbe_1234(%rip),%zmm28






	vmovd	%xmm2,%r15d
	andl	$255,%r15d

	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpshufb	%zmm29,%zmm2,%zmm2



	cmpb	$240,%r15b
	jae	.L_next_16_overflow_499
	vpaddd	%zmm28,%zmm2,%zmm7
	vpaddd	%zmm27,%zmm7,%zmm10
	vpaddd	%zmm27,%zmm10,%zmm11
	vpaddd	%zmm27,%zmm11,%zmm12
	jmp	.L_next_16_ok_499
.L_next_16_overflow_499:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm12
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm7
	vpaddd	%zmm12,%zmm7,%zmm10
	vpaddd	%zmm12,%zmm10,%zmm11
	vpaddd	%zmm12,%zmm11,%zmm12
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vpshufb	%zmm29,%zmm12,%zmm12
.L_next_16_ok_499:
	vshufi64x2	$255,%zmm12,%zmm12,%zmm2
	addb	$16,%r15b

	vmovdqu8	0(%rcx,%r11,1),%zmm0
	vmovdqu8	64(%rcx,%r11,1),%zmm3
	vmovdqu8	128(%rcx,%r11,1),%zmm4
	vmovdqu8	192(%rcx,%r11,1),%zmm5


	vbroadcastf64x2	0(%rdi),%zmm6
	vpxorq	%zmm6,%zmm7,%zmm7
	vpxorq	%zmm6,%zmm10,%zmm10
	vpxorq	%zmm6,%zmm11,%zmm11
	vpxorq	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	16(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	32(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	48(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	64(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	80(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	96(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	112(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	128(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	144(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	160(%rdi),%zmm6
	vaesenclast	%zmm6,%zmm7,%zmm7
	vaesenclast	%zmm6,%zmm10,%zmm10
	vaesenclast	%zmm6,%zmm11,%zmm11
	vaesenclast	%zmm6,%zmm12,%zmm12


	vpxorq	%zmm0,%zmm7,%zmm7
	vpxorq	%zmm3,%zmm10,%zmm10
	vpxorq	%zmm4,%zmm11,%zmm11
	vpxorq	%zmm5,%zmm12,%zmm12


	movq	%r9,%r10
	vmovdqu8	%zmm7,0(%r10,%r11,1)
	vmovdqu8	%zmm10,64(%r10,%r11,1)
	vmovdqu8	%zmm11,128(%r10,%r11,1)
	vmovdqu8	%zmm12,192(%r10,%r11,1)

	vpshufb	%zmm29,%zmm0,%zmm7
	vpshufb	%zmm29,%zmm3,%zmm10
	vpshufb	%zmm29,%zmm4,%zmm11
	vpshufb	%zmm29,%zmm5,%zmm12
	vmovdqa64	%zmm7,768(%rsp)
	vmovdqa64	%zmm10,832(%rsp)
	vmovdqa64	%zmm11,896(%rsp)
	vmovdqa64	%zmm12,960(%rsp)
	testq	%r14,%r14
	jnz	.L_skip_hkeys_precomputation_500

	vmovdqu64	288(%rsi),%zmm0
	vmovdqu64	%zmm0,704(%rsp)

	vmovdqu64	224(%rsi),%zmm3
	vmovdqu64	%zmm3,640(%rsp)


	vshufi64x2	$0x00,%zmm3,%zmm3,%zmm3

	vmovdqu64	160(%rsi),%zmm4
	vmovdqu64	%zmm4,576(%rsp)

	vmovdqu64	96(%rsi),%zmm5
	vmovdqu64	%zmm5,512(%rsp)
.L_skip_hkeys_precomputation_500:
	cmpq	$512,%r8
	jb	.L_message_below_32_blocks_497



	cmpb	$240,%r15b
	jae	.L_next_16_overflow_501
	vpaddd	%zmm28,%zmm2,%zmm7
	vpaddd	%zmm27,%zmm7,%zmm10
	vpaddd	%zmm27,%zmm10,%zmm11
	vpaddd	%zmm27,%zmm11,%zmm12
	jmp	.L_next_16_ok_501
.L_next_16_overflow_501:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm12
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm7
	vpaddd	%zmm12,%zmm7,%zmm10
	vpaddd	%zmm12,%zmm10,%zmm11
	vpaddd	%zmm12,%zmm11,%zmm12
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vpshufb	%zmm29,%zmm12,%zmm12
.L_next_16_ok_501:
	vshufi64x2	$255,%zmm12,%zmm12,%zmm2
	addb	$16,%r15b

	vmovdqu8	256(%rcx,%r11,1),%zmm0
	vmovdqu8	320(%rcx,%r11,1),%zmm3
	vmovdqu8	384(%rcx,%r11,1),%zmm4
	vmovdqu8	448(%rcx,%r11,1),%zmm5


	vbroadcastf64x2	0(%rdi),%zmm6
	vpxorq	%zmm6,%zmm7,%zmm7
	vpxorq	%zmm6,%zmm10,%zmm10
	vpxorq	%zmm6,%zmm11,%zmm11
	vpxorq	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	16(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	32(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	48(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	64(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	80(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	96(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	112(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	128(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	144(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	160(%rdi),%zmm6
	vaesenclast	%zmm6,%zmm7,%zmm7
	vaesenclast	%zmm6,%zmm10,%zmm10
	vaesenclast	%zmm6,%zmm11,%zmm11
	vaesenclast	%zmm6,%zmm12,%zmm12


	vpxorq	%zmm0,%zmm7,%zmm7
	vpxorq	%zmm3,%zmm10,%zmm10
	vpxorq	%zmm4,%zmm11,%zmm11
	vpxorq	%zmm5,%zmm12,%zmm12


	movq	%r9,%r10
	vmovdqu8	%zmm7,256(%r10,%r11,1)
	vmovdqu8	%zmm10,320(%r10,%r11,1)
	vmovdqu8	%zmm11,384(%r10,%r11,1)
	vmovdqu8	%zmm12,448(%r10,%r11,1)

	vpshufb	%zmm29,%zmm0,%zmm7
	vpshufb	%zmm29,%zmm3,%zmm10
	vpshufb	%zmm29,%zmm4,%zmm11
	vpshufb	%zmm29,%zmm5,%zmm12
	vmovdqa64	%zmm7,1024(%rsp)
	vmovdqa64	%zmm10,1088(%rsp)
	vmovdqa64	%zmm11,1152(%rsp)
	vmovdqa64	%zmm12,1216(%rsp)
	testq	%r14,%r14
	jnz	.L_skip_hkeys_precomputation_502
	vmovdqu64	640(%rsp),%zmm3


	vshufi64x2	$0x00,%zmm3,%zmm3,%zmm3

	vmovdqu64	576(%rsp),%zmm4
	vmovdqu64	512(%rsp),%zmm5

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,448(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,384(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,320(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,256(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,192(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,128(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,64(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,0(%rsp)
.L_skip_hkeys_precomputation_502:
	movq	$1,%r14
	addq	$512,%r11
	subq	$512,%r8

	cmpq	$768,%r8
	jb	.L_no_more_big_nblocks_497
.L_encrypt_big_nblocks_497:
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_503
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_503
.L_16_blocks_overflow_503:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_503:
	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp),%zmm18
	vmovdqa64	832(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp),%zmm1
	vmovdqa64	896(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp),%zmm18
	vmovdqa64	960(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm15,%zmm10,%zmm26
	vpxorq	%zmm12,%zmm6,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1)
	vpshufb	%zmm29,%zmm17,%zmm0
	vpshufb	%zmm29,%zmm19,%zmm3
	vpshufb	%zmm29,%zmm20,%zmm4
	vpshufb	%zmm29,%zmm21,%zmm5
	vmovdqa64	%zmm0,1280(%rsp)
	vmovdqa64	%zmm3,1344(%rsp)
	vmovdqa64	%zmm4,1408(%rsp)
	vmovdqa64	%zmm5,1472(%rsp)
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_504
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_504
.L_16_blocks_overflow_504:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_504:
	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	256(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	320(%rsp),%zmm18
	vmovdqa64	1088(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	384(%rsp),%zmm1
	vmovdqa64	1152(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	448(%rsp),%zmm18
	vmovdqa64	1216(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	256(%rcx,%r11,1),%zmm17
	vmovdqu8	320(%rcx,%r11,1),%zmm19
	vmovdqu8	384(%rcx,%r11,1),%zmm20
	vmovdqu8	448(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vpternlogq	$0x96,%zmm12,%zmm6,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,256(%r10,%r11,1)
	vmovdqu8	%zmm3,320(%r10,%r11,1)
	vmovdqu8	%zmm4,384(%r10,%r11,1)
	vmovdqu8	%zmm5,448(%r10,%r11,1)
	vpshufb	%zmm29,%zmm17,%zmm0
	vpshufb	%zmm29,%zmm19,%zmm3
	vpshufb	%zmm29,%zmm20,%zmm4
	vpshufb	%zmm29,%zmm21,%zmm5
	vmovdqa64	%zmm0,768(%rsp)
	vmovdqa64	%zmm3,832(%rsp)
	vmovdqa64	%zmm4,896(%rsp)
	vmovdqa64	%zmm5,960(%rsp)
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_505
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_505
.L_16_blocks_overflow_505:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_505:
	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	512(%rcx,%r11,1),%zmm17
	vmovdqu8	576(%rcx,%r11,1),%zmm19
	vmovdqu8	640(%rcx,%r11,1),%zmm20
	vmovdqu8	704(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30


	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10

	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpternlogq	$0x96,%zmm15,%zmm12,%zmm6
	vpxorq	%zmm24,%zmm6,%zmm6
	vpternlogq	$0x96,%zmm10,%zmm13,%zmm7
	vpxorq	%zmm25,%zmm7,%zmm7
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vextracti64x4	$1,%zmm6,%ymm12
	vpxorq	%ymm12,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm12
	vpxorq	%xmm12,%xmm6,%xmm6
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm6
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,512(%r10,%r11,1)
	vmovdqu8	%zmm3,576(%r10,%r11,1)
	vmovdqu8	%zmm4,640(%r10,%r11,1)
	vmovdqu8	%zmm5,704(%r10,%r11,1)
	vpshufb	%zmm29,%zmm17,%zmm0
	vpshufb	%zmm29,%zmm19,%zmm3
	vpshufb	%zmm29,%zmm20,%zmm4
	vpshufb	%zmm29,%zmm21,%zmm5
	vmovdqa64	%zmm0,1024(%rsp)
	vmovdqa64	%zmm3,1088(%rsp)
	vmovdqa64	%zmm4,1152(%rsp)
	vmovdqa64	%zmm5,1216(%rsp)
	vmovdqa64	%zmm6,%zmm14

	addq	$768,%r11
	subq	$768,%r8
	cmpq	$768,%r8
	jae	.L_encrypt_big_nblocks_497

.L_no_more_big_nblocks_497:

	cmpq	$512,%r8
	jae	.L_encrypt_32_blocks_497

	cmpq	$256,%r8
	jae	.L_encrypt_16_blocks_497
.L_encrypt_0_blocks_ghash_32_497:
	movl	%r8d,%r10d
	andl	$~15,%r10d
	movl	$256,%ebx
	subl	%r10d,%ebx
	vmovdqa64	768(%rsp),%zmm13
	vpxorq	%zmm14,%zmm13,%zmm13
	vmovdqu64	0(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	832(%rsp),%zmm13
	vmovdqu64	64(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpxorq	%zmm10,%zmm4,%zmm26
	vpxorq	%zmm6,%zmm0,%zmm24
	vpxorq	%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	896(%rsp),%zmm13
	vmovdqu64	128(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	960(%rsp),%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	addl	$256,%ebx
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_506

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_506
	jb	.L_last_num_blocks_is_7_1_506


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_506
	jb	.L_last_num_blocks_is_11_9_506


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_506
	ja	.L_last_num_blocks_is_16_506
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_506
	jmp	.L_last_num_blocks_is_13_506

.L_last_num_blocks_is_11_9_506:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_506
	ja	.L_last_num_blocks_is_11_506
	jmp	.L_last_num_blocks_is_9_506

.L_last_num_blocks_is_7_1_506:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_506
	jb	.L_last_num_blocks_is_3_1_506

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_506
	je	.L_last_num_blocks_is_6_506
	jmp	.L_last_num_blocks_is_5_506

.L_last_num_blocks_is_3_1_506:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_506
	je	.L_last_num_blocks_is_2_506
.L_last_num_blocks_is_1_506:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_507
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_507

.L_16_blocks_overflow_507:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_507:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%xmm29,%xmm17,%xmm17
	vextracti32x4	$0,%zmm17,%xmm7


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_508





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_508
.L_small_initial_partial_block_508:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)


	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm0


	vpclmulqdq	$0x01,%xmm25,%xmm0,%xmm3
	vpslldq	$8,%xmm3,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm3


	vpclmulqdq	$0x00,%xmm3,%xmm0,%xmm4
	vpsrldq	$4,%xmm4,%xmm4
	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm4,%xmm14












	vpxorq	%xmm7,%xmm14,%xmm14

	jmp	.L_after_reduction_508
.L_small_initial_compute_done_508:
.L_after_reduction_508:
	jmp	.L_last_blocks_done_506
.L_last_num_blocks_is_2_506:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$254,%r15d
	jae	.L_16_blocks_overflow_509
	vpaddd	%ymm28,%ymm2,%ymm0
	jmp	.L_16_blocks_ok_509

.L_16_blocks_overflow_509:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%ymm29,%ymm0,%ymm0
.L_16_blocks_ok_509:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%ymm17{%k1}{z}
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vaesenclast	%ymm30,%ymm0,%ymm0
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%ymm29,%ymm17,%ymm17
	vextracti32x4	$1,%zmm17,%xmm7
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_510





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_510
.L_small_initial_partial_block_510:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_510:

	orq	%r8,%r8
	je	.L_after_reduction_510
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_510:
	jmp	.L_last_blocks_done_506
.L_last_num_blocks_is_3_506:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$253,%r15d
	jae	.L_16_blocks_overflow_511
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_511

.L_16_blocks_overflow_511:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_511:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vextracti32x4	$2,%zmm17,%xmm7
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_512





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_512
.L_small_initial_partial_block_512:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_512:

	orq	%r8,%r8
	je	.L_after_reduction_512
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_512:
	jmp	.L_last_blocks_done_506
.L_last_num_blocks_is_4_506:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$252,%r15d
	jae	.L_16_blocks_overflow_513
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_513

.L_16_blocks_overflow_513:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_513:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vextracti32x4	$3,%zmm17,%xmm7
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_514





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_514
.L_small_initial_partial_block_514:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_514:

	orq	%r8,%r8
	je	.L_after_reduction_514
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_514:
	jmp	.L_last_blocks_done_506
.L_last_num_blocks_is_5_506:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$251,%r15d
	jae	.L_16_blocks_overflow_515
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%xmm27,%xmm0,%xmm3
	jmp	.L_16_blocks_ok_515

.L_16_blocks_overflow_515:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
.L_16_blocks_ok_515:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%xmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%xmm30,%xmm3,%xmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%xmm19,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%xmm29,%xmm19,%xmm19
	vextracti32x4	$0,%zmm19,%xmm7
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_516





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_516
.L_small_initial_partial_block_516:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_516:

	orq	%r8,%r8
	je	.L_after_reduction_516
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_516:
	jmp	.L_last_blocks_done_506
.L_last_num_blocks_is_6_506:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$250,%r15d
	jae	.L_16_blocks_overflow_517
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%ymm27,%ymm0,%ymm3
	jmp	.L_16_blocks_ok_517

.L_16_blocks_overflow_517:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
.L_16_blocks_ok_517:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%ymm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%ymm30,%ymm3,%ymm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%ymm29,%ymm19,%ymm19
	vextracti32x4	$1,%zmm19,%xmm7
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_518





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_518
.L_small_initial_partial_block_518:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_518:

	orq	%r8,%r8
	je	.L_after_reduction_518
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_518:
	jmp	.L_last_blocks_done_506
.L_last_num_blocks_is_7_506:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$249,%r15d
	jae	.L_16_blocks_overflow_519
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_519

.L_16_blocks_overflow_519:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_519:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vextracti32x4	$2,%zmm19,%xmm7
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_520





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_520
.L_small_initial_partial_block_520:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_520:

	orq	%r8,%r8
	je	.L_after_reduction_520
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_520:
	jmp	.L_last_blocks_done_506
.L_last_num_blocks_is_8_506:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$248,%r15d
	jae	.L_16_blocks_overflow_521
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_521

.L_16_blocks_overflow_521:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_521:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vextracti32x4	$3,%zmm19,%xmm7
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_522





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_522
.L_small_initial_partial_block_522:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_522:

	orq	%r8,%r8
	je	.L_after_reduction_522
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_522:
	jmp	.L_last_blocks_done_506
.L_last_num_blocks_is_9_506:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$247,%r15d
	jae	.L_16_blocks_overflow_523
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%xmm27,%xmm3,%xmm4
	jmp	.L_16_blocks_ok_523

.L_16_blocks_overflow_523:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
.L_16_blocks_ok_523:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%xmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%xmm30,%xmm4,%xmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%xmm20,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%xmm29,%xmm20,%xmm20
	vextracti32x4	$0,%zmm20,%xmm7
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_524





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_524
.L_small_initial_partial_block_524:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_524:

	orq	%r8,%r8
	je	.L_after_reduction_524
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_524:
	jmp	.L_last_blocks_done_506
.L_last_num_blocks_is_10_506:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$246,%r15d
	jae	.L_16_blocks_overflow_525
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%ymm27,%ymm3,%ymm4
	jmp	.L_16_blocks_ok_525

.L_16_blocks_overflow_525:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
.L_16_blocks_ok_525:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%ymm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%ymm30,%ymm4,%ymm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%ymm20,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%ymm29,%ymm20,%ymm20
	vextracti32x4	$1,%zmm20,%xmm7
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_526





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_526
.L_small_initial_partial_block_526:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_526:

	orq	%r8,%r8
	je	.L_after_reduction_526
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_526:
	jmp	.L_last_blocks_done_506
.L_last_num_blocks_is_11_506:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$245,%r15d
	jae	.L_16_blocks_overflow_527
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_527

.L_16_blocks_overflow_527:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_527:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vextracti32x4	$2,%zmm20,%xmm7
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_528





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_528
.L_small_initial_partial_block_528:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_528:

	orq	%r8,%r8
	je	.L_after_reduction_528
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_528:
	jmp	.L_last_blocks_done_506
.L_last_num_blocks_is_12_506:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$244,%r15d
	jae	.L_16_blocks_overflow_529
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_529

.L_16_blocks_overflow_529:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_529:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vextracti32x4	$3,%zmm20,%xmm7
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_530





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_530
.L_small_initial_partial_block_530:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_530:

	orq	%r8,%r8
	je	.L_after_reduction_530
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_530:
	jmp	.L_last_blocks_done_506
.L_last_num_blocks_is_13_506:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$243,%r15d
	jae	.L_16_blocks_overflow_531
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%xmm27,%xmm4,%xmm5
	jmp	.L_16_blocks_ok_531

.L_16_blocks_overflow_531:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
.L_16_blocks_ok_531:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%xmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%xmm30,%xmm5,%xmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%xmm21,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%xmm29,%xmm21,%xmm21
	vextracti32x4	$0,%zmm21,%xmm7
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_532





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_532
.L_small_initial_partial_block_532:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_532:

	orq	%r8,%r8
	je	.L_after_reduction_532
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_532:
	jmp	.L_last_blocks_done_506
.L_last_num_blocks_is_14_506:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$242,%r15d
	jae	.L_16_blocks_overflow_533
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%ymm27,%ymm4,%ymm5
	jmp	.L_16_blocks_ok_533

.L_16_blocks_overflow_533:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
.L_16_blocks_ok_533:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%ymm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%ymm30,%ymm5,%ymm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%ymm21,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%ymm29,%ymm21,%ymm21
	vextracti32x4	$1,%zmm21,%xmm7
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_534





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_534
.L_small_initial_partial_block_534:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_534:

	orq	%r8,%r8
	je	.L_after_reduction_534
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_534:
	jmp	.L_last_blocks_done_506
.L_last_num_blocks_is_15_506:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$241,%r15d
	jae	.L_16_blocks_overflow_535
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_535

.L_16_blocks_overflow_535:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_535:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%zmm29,%zmm21,%zmm21
	vextracti32x4	$2,%zmm21,%xmm7
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_536





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_536
.L_small_initial_partial_block_536:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_536:

	orq	%r8,%r8
	je	.L_after_reduction_536
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_536:
	jmp	.L_last_blocks_done_506
.L_last_num_blocks_is_16_506:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$240,%r15d
	jae	.L_16_blocks_overflow_537
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_537

.L_16_blocks_overflow_537:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_537:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%zmm29,%zmm21,%zmm21
	vextracti32x4	$3,%zmm21,%xmm7
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_538:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_538:
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_538:
	jmp	.L_last_blocks_done_506
.L_last_num_blocks_is_0_506:
	vmovdqa64	1024(%rsp),%zmm13
	vmovdqu64	0(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1088(%rsp),%zmm13
	vmovdqu64	64(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1152(%rsp),%zmm13
	vmovdqu64	128(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1216(%rsp),%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

.L_last_blocks_done_506:
	vpshufb	%xmm29,%xmm2,%xmm2
	jmp	.L_ghash_done_497
.L_encrypt_32_blocks_497:
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_539
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_539
.L_16_blocks_overflow_539:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_539:
	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp),%zmm18
	vmovdqa64	832(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp),%zmm1
	vmovdqa64	896(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp),%zmm18
	vmovdqa64	960(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm15,%zmm10,%zmm26
	vpxorq	%zmm12,%zmm6,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1)
	vpshufb	%zmm29,%zmm17,%zmm0
	vpshufb	%zmm29,%zmm19,%zmm3
	vpshufb	%zmm29,%zmm20,%zmm4
	vpshufb	%zmm29,%zmm21,%zmm5
	vmovdqa64	%zmm0,1280(%rsp)
	vmovdqa64	%zmm3,1344(%rsp)
	vmovdqa64	%zmm4,1408(%rsp)
	vmovdqa64	%zmm5,1472(%rsp)
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_540
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_540
.L_16_blocks_overflow_540:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_540:
	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	256(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	320(%rsp),%zmm18
	vmovdqa64	1088(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	384(%rsp),%zmm1
	vmovdqa64	1152(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	448(%rsp),%zmm18
	vmovdqa64	1216(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	256(%rcx,%r11,1),%zmm17
	vmovdqu8	320(%rcx,%r11,1),%zmm19
	vmovdqu8	384(%rcx,%r11,1),%zmm20
	vmovdqu8	448(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vpternlogq	$0x96,%zmm12,%zmm6,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,256(%r10,%r11,1)
	vmovdqu8	%zmm3,320(%r10,%r11,1)
	vmovdqu8	%zmm4,384(%r10,%r11,1)
	vmovdqu8	%zmm5,448(%r10,%r11,1)
	vpshufb	%zmm29,%zmm17,%zmm0
	vpshufb	%zmm29,%zmm19,%zmm3
	vpshufb	%zmm29,%zmm20,%zmm4
	vpshufb	%zmm29,%zmm21,%zmm5
	vmovdqa64	%zmm0,768(%rsp)
	vmovdqa64	%zmm3,832(%rsp)
	vmovdqa64	%zmm4,896(%rsp)
	vmovdqa64	%zmm5,960(%rsp)
	vmovdqa64	1280(%rsp),%zmm13
	vmovdqu64	512(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1344(%rsp),%zmm13
	vmovdqu64	576(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1408(%rsp),%zmm13
	vmovdqu64	640(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1472(%rsp),%zmm13
	vmovdqu64	704(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

	subq	$512,%r8
	addq	$512,%r11
	movl	%r8d,%r10d
	andl	$~15,%r10d
	movl	$512,%ebx
	subl	%r10d,%ebx
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_541

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_541
	jb	.L_last_num_blocks_is_7_1_541


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_541
	jb	.L_last_num_blocks_is_11_9_541


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_541
	ja	.L_last_num_blocks_is_16_541
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_541
	jmp	.L_last_num_blocks_is_13_541

.L_last_num_blocks_is_11_9_541:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_541
	ja	.L_last_num_blocks_is_11_541
	jmp	.L_last_num_blocks_is_9_541

.L_last_num_blocks_is_7_1_541:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_541
	jb	.L_last_num_blocks_is_3_1_541

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_541
	je	.L_last_num_blocks_is_6_541
	jmp	.L_last_num_blocks_is_5_541

.L_last_num_blocks_is_3_1_541:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_541
	je	.L_last_num_blocks_is_2_541
.L_last_num_blocks_is_1_541:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_542
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_542

.L_16_blocks_overflow_542:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_542:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%xmm29,%xmm17,%xmm17
	vextracti32x4	$0,%zmm17,%xmm7


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_543





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_543
.L_small_initial_partial_block_543:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)


	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm0


	vpclmulqdq	$0x01,%xmm25,%xmm0,%xmm3
	vpslldq	$8,%xmm3,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm3


	vpclmulqdq	$0x00,%xmm3,%xmm0,%xmm4
	vpsrldq	$4,%xmm4,%xmm4
	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm4,%xmm14












	vpxorq	%xmm7,%xmm14,%xmm14

	jmp	.L_after_reduction_543
.L_small_initial_compute_done_543:
.L_after_reduction_543:
	jmp	.L_last_blocks_done_541
.L_last_num_blocks_is_2_541:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$254,%r15d
	jae	.L_16_blocks_overflow_544
	vpaddd	%ymm28,%ymm2,%ymm0
	jmp	.L_16_blocks_ok_544

.L_16_blocks_overflow_544:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%ymm29,%ymm0,%ymm0
.L_16_blocks_ok_544:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%ymm17{%k1}{z}
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vaesenclast	%ymm30,%ymm0,%ymm0
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%ymm29,%ymm17,%ymm17
	vextracti32x4	$1,%zmm17,%xmm7
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_545





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_545
.L_small_initial_partial_block_545:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_545:

	orq	%r8,%r8
	je	.L_after_reduction_545
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_545:
	jmp	.L_last_blocks_done_541
.L_last_num_blocks_is_3_541:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$253,%r15d
	jae	.L_16_blocks_overflow_546
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_546

.L_16_blocks_overflow_546:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_546:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vextracti32x4	$2,%zmm17,%xmm7
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_547





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_547
.L_small_initial_partial_block_547:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_547:

	orq	%r8,%r8
	je	.L_after_reduction_547
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_547:
	jmp	.L_last_blocks_done_541
.L_last_num_blocks_is_4_541:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$252,%r15d
	jae	.L_16_blocks_overflow_548
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_548

.L_16_blocks_overflow_548:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_548:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vextracti32x4	$3,%zmm17,%xmm7
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_549





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_549
.L_small_initial_partial_block_549:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_549:

	orq	%r8,%r8
	je	.L_after_reduction_549
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_549:
	jmp	.L_last_blocks_done_541
.L_last_num_blocks_is_5_541:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$251,%r15d
	jae	.L_16_blocks_overflow_550
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%xmm27,%xmm0,%xmm3
	jmp	.L_16_blocks_ok_550

.L_16_blocks_overflow_550:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
.L_16_blocks_ok_550:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%xmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%xmm30,%xmm3,%xmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%xmm19,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%xmm29,%xmm19,%xmm19
	vextracti32x4	$0,%zmm19,%xmm7
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_551





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_551
.L_small_initial_partial_block_551:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_551:

	orq	%r8,%r8
	je	.L_after_reduction_551
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_551:
	jmp	.L_last_blocks_done_541
.L_last_num_blocks_is_6_541:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$250,%r15d
	jae	.L_16_blocks_overflow_552
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%ymm27,%ymm0,%ymm3
	jmp	.L_16_blocks_ok_552

.L_16_blocks_overflow_552:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
.L_16_blocks_ok_552:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%ymm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%ymm30,%ymm3,%ymm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%ymm29,%ymm19,%ymm19
	vextracti32x4	$1,%zmm19,%xmm7
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_553





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_553
.L_small_initial_partial_block_553:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_553:

	orq	%r8,%r8
	je	.L_after_reduction_553
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_553:
	jmp	.L_last_blocks_done_541
.L_last_num_blocks_is_7_541:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$249,%r15d
	jae	.L_16_blocks_overflow_554
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_554

.L_16_blocks_overflow_554:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_554:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vextracti32x4	$2,%zmm19,%xmm7
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_555





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_555
.L_small_initial_partial_block_555:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_555:

	orq	%r8,%r8
	je	.L_after_reduction_555
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_555:
	jmp	.L_last_blocks_done_541
.L_last_num_blocks_is_8_541:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$248,%r15d
	jae	.L_16_blocks_overflow_556
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_556

.L_16_blocks_overflow_556:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_556:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vextracti32x4	$3,%zmm19,%xmm7
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_557





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_557
.L_small_initial_partial_block_557:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_557:

	orq	%r8,%r8
	je	.L_after_reduction_557
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_557:
	jmp	.L_last_blocks_done_541
.L_last_num_blocks_is_9_541:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$247,%r15d
	jae	.L_16_blocks_overflow_558
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%xmm27,%xmm3,%xmm4
	jmp	.L_16_blocks_ok_558

.L_16_blocks_overflow_558:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
.L_16_blocks_ok_558:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%xmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%xmm30,%xmm4,%xmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%xmm20,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%xmm29,%xmm20,%xmm20
	vextracti32x4	$0,%zmm20,%xmm7
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_559





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_559
.L_small_initial_partial_block_559:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_559:

	orq	%r8,%r8
	je	.L_after_reduction_559
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_559:
	jmp	.L_last_blocks_done_541
.L_last_num_blocks_is_10_541:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$246,%r15d
	jae	.L_16_blocks_overflow_560
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%ymm27,%ymm3,%ymm4
	jmp	.L_16_blocks_ok_560

.L_16_blocks_overflow_560:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
.L_16_blocks_ok_560:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%ymm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%ymm30,%ymm4,%ymm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%ymm20,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%ymm29,%ymm20,%ymm20
	vextracti32x4	$1,%zmm20,%xmm7
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_561





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_561
.L_small_initial_partial_block_561:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_561:

	orq	%r8,%r8
	je	.L_after_reduction_561
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_561:
	jmp	.L_last_blocks_done_541
.L_last_num_blocks_is_11_541:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$245,%r15d
	jae	.L_16_blocks_overflow_562
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_562

.L_16_blocks_overflow_562:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_562:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vextracti32x4	$2,%zmm20,%xmm7
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_563





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_563
.L_small_initial_partial_block_563:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_563:

	orq	%r8,%r8
	je	.L_after_reduction_563
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_563:
	jmp	.L_last_blocks_done_541
.L_last_num_blocks_is_12_541:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$244,%r15d
	jae	.L_16_blocks_overflow_564
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_564

.L_16_blocks_overflow_564:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_564:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vextracti32x4	$3,%zmm20,%xmm7
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_565





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_565
.L_small_initial_partial_block_565:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_565:

	orq	%r8,%r8
	je	.L_after_reduction_565
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_565:
	jmp	.L_last_blocks_done_541
.L_last_num_blocks_is_13_541:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$243,%r15d
	jae	.L_16_blocks_overflow_566
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%xmm27,%xmm4,%xmm5
	jmp	.L_16_blocks_ok_566

.L_16_blocks_overflow_566:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
.L_16_blocks_ok_566:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%xmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%xmm30,%xmm5,%xmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%xmm21,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%xmm29,%xmm21,%xmm21
	vextracti32x4	$0,%zmm21,%xmm7
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_567





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_567
.L_small_initial_partial_block_567:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_567:

	orq	%r8,%r8
	je	.L_after_reduction_567
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_567:
	jmp	.L_last_blocks_done_541
.L_last_num_blocks_is_14_541:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$242,%r15d
	jae	.L_16_blocks_overflow_568
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%ymm27,%ymm4,%ymm5
	jmp	.L_16_blocks_ok_568

.L_16_blocks_overflow_568:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
.L_16_blocks_ok_568:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%ymm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%ymm30,%ymm5,%ymm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%ymm21,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%ymm29,%ymm21,%ymm21
	vextracti32x4	$1,%zmm21,%xmm7
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_569





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_569
.L_small_initial_partial_block_569:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_569:

	orq	%r8,%r8
	je	.L_after_reduction_569
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_569:
	jmp	.L_last_blocks_done_541
.L_last_num_blocks_is_15_541:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$241,%r15d
	jae	.L_16_blocks_overflow_570
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_570

.L_16_blocks_overflow_570:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_570:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%zmm29,%zmm21,%zmm21
	vextracti32x4	$2,%zmm21,%xmm7
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_571





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_571
.L_small_initial_partial_block_571:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_571:

	orq	%r8,%r8
	je	.L_after_reduction_571
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_571:
	jmp	.L_last_blocks_done_541
.L_last_num_blocks_is_16_541:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$240,%r15d
	jae	.L_16_blocks_overflow_572
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_572

.L_16_blocks_overflow_572:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_572:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%zmm29,%zmm21,%zmm21
	vextracti32x4	$3,%zmm21,%xmm7
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_573:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_573:
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_573:
	jmp	.L_last_blocks_done_541
.L_last_num_blocks_is_0_541:
	vmovdqa64	768(%rsp),%zmm13
	vpxorq	%zmm14,%zmm13,%zmm13
	vmovdqu64	0(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	832(%rsp),%zmm13
	vmovdqu64	64(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpxorq	%zmm10,%zmm4,%zmm26
	vpxorq	%zmm6,%zmm0,%zmm24
	vpxorq	%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	896(%rsp),%zmm13
	vmovdqu64	128(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	960(%rsp),%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

.L_last_blocks_done_541:
	vpshufb	%xmm29,%xmm2,%xmm2
	jmp	.L_ghash_done_497
.L_encrypt_16_blocks_497:
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_574
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_574
.L_16_blocks_overflow_574:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_574:
	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp),%zmm18
	vmovdqa64	832(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp),%zmm1
	vmovdqa64	896(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp),%zmm18
	vmovdqa64	960(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm15,%zmm10,%zmm26
	vpxorq	%zmm12,%zmm6,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1)
	vpshufb	%zmm29,%zmm17,%zmm0
	vpshufb	%zmm29,%zmm19,%zmm3
	vpshufb	%zmm29,%zmm20,%zmm4
	vpshufb	%zmm29,%zmm21,%zmm5
	vmovdqa64	%zmm0,1280(%rsp)
	vmovdqa64	%zmm3,1344(%rsp)
	vmovdqa64	%zmm4,1408(%rsp)
	vmovdqa64	%zmm5,1472(%rsp)
	vmovdqa64	1024(%rsp),%zmm13
	vmovdqu64	256(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1088(%rsp),%zmm13
	vmovdqu64	320(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1152(%rsp),%zmm13
	vmovdqu64	384(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1216(%rsp),%zmm13
	vmovdqu64	448(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	subq	$256,%r8
	addq	$256,%r11
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_575

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_575
	jb	.L_last_num_blocks_is_7_1_575


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_575
	jb	.L_last_num_blocks_is_11_9_575


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_575
	ja	.L_last_num_blocks_is_16_575
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_575
	jmp	.L_last_num_blocks_is_13_575

.L_last_num_blocks_is_11_9_575:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_575
	ja	.L_last_num_blocks_is_11_575
	jmp	.L_last_num_blocks_is_9_575

.L_last_num_blocks_is_7_1_575:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_575
	jb	.L_last_num_blocks_is_3_1_575

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_575
	je	.L_last_num_blocks_is_6_575
	jmp	.L_last_num_blocks_is_5_575

.L_last_num_blocks_is_3_1_575:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_575
	je	.L_last_num_blocks_is_2_575
.L_last_num_blocks_is_1_575:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_576
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_576

.L_16_blocks_overflow_576:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_576:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%xmm31,%xmm0,%xmm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%xmm29,%xmm17,%xmm17
	vextracti32x4	$0,%zmm17,%xmm7


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_577





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_577
.L_small_initial_partial_block_577:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)











	vpxorq	%xmm7,%xmm14,%xmm14

	jmp	.L_after_reduction_577
.L_small_initial_compute_done_577:
.L_after_reduction_577:
	jmp	.L_last_blocks_done_575
.L_last_num_blocks_is_2_575:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$254,%r15d
	jae	.L_16_blocks_overflow_578
	vpaddd	%ymm28,%ymm2,%ymm0
	jmp	.L_16_blocks_ok_578

.L_16_blocks_overflow_578:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%ymm29,%ymm0,%ymm0
.L_16_blocks_ok_578:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%ymm17{%k1}{z}
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%ymm31,%ymm0,%ymm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%ymm30,%ymm0,%ymm0
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%ymm29,%ymm17,%ymm17
	vextracti32x4	$1,%zmm17,%xmm7
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_579





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_579
.L_small_initial_partial_block_579:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_579:

	orq	%r8,%r8
	je	.L_after_reduction_579
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_579:
	jmp	.L_last_blocks_done_575
.L_last_num_blocks_is_3_575:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$253,%r15d
	jae	.L_16_blocks_overflow_580
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_580

.L_16_blocks_overflow_580:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_580:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vextracti32x4	$2,%zmm17,%xmm7
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_581





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_581
.L_small_initial_partial_block_581:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_581:

	orq	%r8,%r8
	je	.L_after_reduction_581
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_581:
	jmp	.L_last_blocks_done_575
.L_last_num_blocks_is_4_575:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$252,%r15d
	jae	.L_16_blocks_overflow_582
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_582

.L_16_blocks_overflow_582:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_582:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vextracti32x4	$3,%zmm17,%xmm7
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_583





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_583
.L_small_initial_partial_block_583:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_583:

	orq	%r8,%r8
	je	.L_after_reduction_583
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_583:
	jmp	.L_last_blocks_done_575
.L_last_num_blocks_is_5_575:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$251,%r15d
	jae	.L_16_blocks_overflow_584
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%xmm27,%xmm0,%xmm3
	jmp	.L_16_blocks_ok_584

.L_16_blocks_overflow_584:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
.L_16_blocks_ok_584:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%xmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%xmm30,%xmm3,%xmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%xmm19,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%xmm29,%xmm19,%xmm19
	vextracti32x4	$0,%zmm19,%xmm7
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_585





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_585
.L_small_initial_partial_block_585:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_585:

	orq	%r8,%r8
	je	.L_after_reduction_585
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_585:
	jmp	.L_last_blocks_done_575
.L_last_num_blocks_is_6_575:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$250,%r15d
	jae	.L_16_blocks_overflow_586
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%ymm27,%ymm0,%ymm3
	jmp	.L_16_blocks_ok_586

.L_16_blocks_overflow_586:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
.L_16_blocks_ok_586:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%ymm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%ymm30,%ymm3,%ymm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%ymm29,%ymm19,%ymm19
	vextracti32x4	$1,%zmm19,%xmm7
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_587





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_587
.L_small_initial_partial_block_587:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_587:

	orq	%r8,%r8
	je	.L_after_reduction_587
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_587:
	jmp	.L_last_blocks_done_575
.L_last_num_blocks_is_7_575:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$249,%r15d
	jae	.L_16_blocks_overflow_588
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_588

.L_16_blocks_overflow_588:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_588:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vextracti32x4	$2,%zmm19,%xmm7
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_589





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_589
.L_small_initial_partial_block_589:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_589:

	orq	%r8,%r8
	je	.L_after_reduction_589
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_589:
	jmp	.L_last_blocks_done_575
.L_last_num_blocks_is_8_575:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$248,%r15d
	jae	.L_16_blocks_overflow_590
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_590

.L_16_blocks_overflow_590:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_590:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vextracti32x4	$3,%zmm19,%xmm7
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_591





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_591
.L_small_initial_partial_block_591:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_591:

	orq	%r8,%r8
	je	.L_after_reduction_591
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_591:
	jmp	.L_last_blocks_done_575
.L_last_num_blocks_is_9_575:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$247,%r15d
	jae	.L_16_blocks_overflow_592
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%xmm27,%xmm3,%xmm4
	jmp	.L_16_blocks_ok_592

.L_16_blocks_overflow_592:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
.L_16_blocks_ok_592:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%xmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%xmm30,%xmm4,%xmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%xmm20,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%xmm29,%xmm20,%xmm20
	vextracti32x4	$0,%zmm20,%xmm7
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_593





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_593
.L_small_initial_partial_block_593:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_593:

	orq	%r8,%r8
	je	.L_after_reduction_593
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_593:
	jmp	.L_last_blocks_done_575
.L_last_num_blocks_is_10_575:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$246,%r15d
	jae	.L_16_blocks_overflow_594
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%ymm27,%ymm3,%ymm4
	jmp	.L_16_blocks_ok_594

.L_16_blocks_overflow_594:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
.L_16_blocks_ok_594:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%ymm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%ymm30,%ymm4,%ymm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%ymm20,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%ymm29,%ymm20,%ymm20
	vextracti32x4	$1,%zmm20,%xmm7
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_595





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_595
.L_small_initial_partial_block_595:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_595:

	orq	%r8,%r8
	je	.L_after_reduction_595
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_595:
	jmp	.L_last_blocks_done_575
.L_last_num_blocks_is_11_575:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$245,%r15d
	jae	.L_16_blocks_overflow_596
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_596

.L_16_blocks_overflow_596:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_596:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vextracti32x4	$2,%zmm20,%xmm7
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_597





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_597
.L_small_initial_partial_block_597:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_597:

	orq	%r8,%r8
	je	.L_after_reduction_597
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_597:
	jmp	.L_last_blocks_done_575
.L_last_num_blocks_is_12_575:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$244,%r15d
	jae	.L_16_blocks_overflow_598
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_598

.L_16_blocks_overflow_598:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_598:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vextracti32x4	$3,%zmm20,%xmm7
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_599





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_599
.L_small_initial_partial_block_599:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_599:

	orq	%r8,%r8
	je	.L_after_reduction_599
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_599:
	jmp	.L_last_blocks_done_575
.L_last_num_blocks_is_13_575:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$243,%r15d
	jae	.L_16_blocks_overflow_600
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%xmm27,%xmm4,%xmm5
	jmp	.L_16_blocks_ok_600

.L_16_blocks_overflow_600:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
.L_16_blocks_ok_600:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%xmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%xmm30,%xmm5,%xmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%xmm21,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%xmm29,%xmm21,%xmm21
	vextracti32x4	$0,%zmm21,%xmm7
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_601





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_601
.L_small_initial_partial_block_601:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_601:

	orq	%r8,%r8
	je	.L_after_reduction_601
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_601:
	jmp	.L_last_blocks_done_575
.L_last_num_blocks_is_14_575:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$242,%r15d
	jae	.L_16_blocks_overflow_602
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%ymm27,%ymm4,%ymm5
	jmp	.L_16_blocks_ok_602

.L_16_blocks_overflow_602:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
.L_16_blocks_ok_602:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%ymm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%ymm30,%ymm5,%ymm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%ymm21,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%ymm29,%ymm21,%ymm21
	vextracti32x4	$1,%zmm21,%xmm7
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_603





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_603
.L_small_initial_partial_block_603:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_603:

	orq	%r8,%r8
	je	.L_after_reduction_603
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_603:
	jmp	.L_last_blocks_done_575
.L_last_num_blocks_is_15_575:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$241,%r15d
	jae	.L_16_blocks_overflow_604
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_604

.L_16_blocks_overflow_604:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_604:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%zmm29,%zmm21,%zmm21
	vextracti32x4	$2,%zmm21,%xmm7
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_605





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_605
.L_small_initial_partial_block_605:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_605:

	orq	%r8,%r8
	je	.L_after_reduction_605
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_605:
	jmp	.L_last_blocks_done_575
.L_last_num_blocks_is_16_575:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$240,%r15d
	jae	.L_16_blocks_overflow_606
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_606

.L_16_blocks_overflow_606:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_606:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%zmm29,%zmm21,%zmm21
	vextracti32x4	$3,%zmm21,%xmm7
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_607:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_607:
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_607:
	jmp	.L_last_blocks_done_575
.L_last_num_blocks_is_0_575:
	vmovdqa64	1280(%rsp),%zmm13
	vmovdqu64	512(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1344(%rsp),%zmm13
	vmovdqu64	576(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1408(%rsp),%zmm13
	vmovdqu64	640(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1472(%rsp),%zmm13
	vmovdqu64	704(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

.L_last_blocks_done_575:
	vpshufb	%xmm29,%xmm2,%xmm2
	jmp	.L_ghash_done_497

.L_message_below_32_blocks_497:


	subq	$256,%r8
	addq	$256,%r11
	movl	%r8d,%r10d
	testq	%r14,%r14
	jnz	.L_skip_hkeys_precomputation_608
	vmovdqu64	640(%rsp),%zmm3


	vshufi64x2	$0x00,%zmm3,%zmm3,%zmm3

	vmovdqu64	576(%rsp),%zmm4
	vmovdqu64	512(%rsp),%zmm5

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,448(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,384(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,320(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,256(%rsp)
.L_skip_hkeys_precomputation_608:
	movq	$1,%r14
	andl	$~15,%r10d
	movl	$512,%ebx
	subl	%r10d,%ebx
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_609

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_609
	jb	.L_last_num_blocks_is_7_1_609


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_609
	jb	.L_last_num_blocks_is_11_9_609


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_609
	ja	.L_last_num_blocks_is_16_609
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_609
	jmp	.L_last_num_blocks_is_13_609

.L_last_num_blocks_is_11_9_609:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_609
	ja	.L_last_num_blocks_is_11_609
	jmp	.L_last_num_blocks_is_9_609

.L_last_num_blocks_is_7_1_609:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_609
	jb	.L_last_num_blocks_is_3_1_609

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_609
	je	.L_last_num_blocks_is_6_609
	jmp	.L_last_num_blocks_is_5_609

.L_last_num_blocks_is_3_1_609:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_609
	je	.L_last_num_blocks_is_2_609
.L_last_num_blocks_is_1_609:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_610
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_610

.L_16_blocks_overflow_610:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_610:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%xmm29,%xmm17,%xmm17
	vextracti32x4	$0,%zmm17,%xmm7


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_611





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_611
.L_small_initial_partial_block_611:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)


	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm0


	vpclmulqdq	$0x01,%xmm25,%xmm0,%xmm3
	vpslldq	$8,%xmm3,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm3


	vpclmulqdq	$0x00,%xmm3,%xmm0,%xmm4
	vpsrldq	$4,%xmm4,%xmm4
	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm4,%xmm14












	vpxorq	%xmm7,%xmm14,%xmm14

	jmp	.L_after_reduction_611
.L_small_initial_compute_done_611:
.L_after_reduction_611:
	jmp	.L_last_blocks_done_609
.L_last_num_blocks_is_2_609:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$254,%r15d
	jae	.L_16_blocks_overflow_612
	vpaddd	%ymm28,%ymm2,%ymm0
	jmp	.L_16_blocks_ok_612

.L_16_blocks_overflow_612:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%ymm29,%ymm0,%ymm0
.L_16_blocks_ok_612:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%ymm17{%k1}{z}
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vaesenclast	%ymm30,%ymm0,%ymm0
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%ymm29,%ymm17,%ymm17
	vextracti32x4	$1,%zmm17,%xmm7
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_613





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_613
.L_small_initial_partial_block_613:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_613:

	orq	%r8,%r8
	je	.L_after_reduction_613
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_613:
	jmp	.L_last_blocks_done_609
.L_last_num_blocks_is_3_609:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$253,%r15d
	jae	.L_16_blocks_overflow_614
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_614

.L_16_blocks_overflow_614:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_614:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vextracti32x4	$2,%zmm17,%xmm7
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_615





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_615
.L_small_initial_partial_block_615:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_615:

	orq	%r8,%r8
	je	.L_after_reduction_615
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_615:
	jmp	.L_last_blocks_done_609
.L_last_num_blocks_is_4_609:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$252,%r15d
	jae	.L_16_blocks_overflow_616
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_616

.L_16_blocks_overflow_616:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_616:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vextracti32x4	$3,%zmm17,%xmm7
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_617





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_617
.L_small_initial_partial_block_617:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_617:

	orq	%r8,%r8
	je	.L_after_reduction_617
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_617:
	jmp	.L_last_blocks_done_609
.L_last_num_blocks_is_5_609:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$251,%r15d
	jae	.L_16_blocks_overflow_618
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%xmm27,%xmm0,%xmm3
	jmp	.L_16_blocks_ok_618

.L_16_blocks_overflow_618:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
.L_16_blocks_ok_618:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%xmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%xmm30,%xmm3,%xmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%xmm19,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%xmm29,%xmm19,%xmm19
	vextracti32x4	$0,%zmm19,%xmm7
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_619





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_619
.L_small_initial_partial_block_619:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_619:

	orq	%r8,%r8
	je	.L_after_reduction_619
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_619:
	jmp	.L_last_blocks_done_609
.L_last_num_blocks_is_6_609:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$250,%r15d
	jae	.L_16_blocks_overflow_620
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%ymm27,%ymm0,%ymm3
	jmp	.L_16_blocks_ok_620

.L_16_blocks_overflow_620:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
.L_16_blocks_ok_620:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%ymm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%ymm30,%ymm3,%ymm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%ymm29,%ymm19,%ymm19
	vextracti32x4	$1,%zmm19,%xmm7
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_621





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_621
.L_small_initial_partial_block_621:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_621:

	orq	%r8,%r8
	je	.L_after_reduction_621
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_621:
	jmp	.L_last_blocks_done_609
.L_last_num_blocks_is_7_609:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$249,%r15d
	jae	.L_16_blocks_overflow_622
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_622

.L_16_blocks_overflow_622:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_622:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vextracti32x4	$2,%zmm19,%xmm7
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_623





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_623
.L_small_initial_partial_block_623:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_623:

	orq	%r8,%r8
	je	.L_after_reduction_623
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_623:
	jmp	.L_last_blocks_done_609
.L_last_num_blocks_is_8_609:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$248,%r15d
	jae	.L_16_blocks_overflow_624
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_624

.L_16_blocks_overflow_624:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_624:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vextracti32x4	$3,%zmm19,%xmm7
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_625





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_625
.L_small_initial_partial_block_625:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_625:

	orq	%r8,%r8
	je	.L_after_reduction_625
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_625:
	jmp	.L_last_blocks_done_609
.L_last_num_blocks_is_9_609:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$247,%r15d
	jae	.L_16_blocks_overflow_626
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%xmm27,%xmm3,%xmm4
	jmp	.L_16_blocks_ok_626

.L_16_blocks_overflow_626:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
.L_16_blocks_ok_626:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%xmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%xmm30,%xmm4,%xmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%xmm20,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%xmm29,%xmm20,%xmm20
	vextracti32x4	$0,%zmm20,%xmm7
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_627





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_627
.L_small_initial_partial_block_627:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_627:

	orq	%r8,%r8
	je	.L_after_reduction_627
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_627:
	jmp	.L_last_blocks_done_609
.L_last_num_blocks_is_10_609:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$246,%r15d
	jae	.L_16_blocks_overflow_628
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%ymm27,%ymm3,%ymm4
	jmp	.L_16_blocks_ok_628

.L_16_blocks_overflow_628:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
.L_16_blocks_ok_628:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%ymm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%ymm30,%ymm4,%ymm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%ymm20,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%ymm29,%ymm20,%ymm20
	vextracti32x4	$1,%zmm20,%xmm7
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_629





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_629
.L_small_initial_partial_block_629:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_629:

	orq	%r8,%r8
	je	.L_after_reduction_629
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_629:
	jmp	.L_last_blocks_done_609
.L_last_num_blocks_is_11_609:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$245,%r15d
	jae	.L_16_blocks_overflow_630
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_630

.L_16_blocks_overflow_630:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_630:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vextracti32x4	$2,%zmm20,%xmm7
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_631





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_631
.L_small_initial_partial_block_631:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_631:

	orq	%r8,%r8
	je	.L_after_reduction_631
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_631:
	jmp	.L_last_blocks_done_609
.L_last_num_blocks_is_12_609:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$244,%r15d
	jae	.L_16_blocks_overflow_632
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_632

.L_16_blocks_overflow_632:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_632:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vextracti32x4	$3,%zmm20,%xmm7
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_633





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_633
.L_small_initial_partial_block_633:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_633:

	orq	%r8,%r8
	je	.L_after_reduction_633
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_633:
	jmp	.L_last_blocks_done_609
.L_last_num_blocks_is_13_609:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$243,%r15d
	jae	.L_16_blocks_overflow_634
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%xmm27,%xmm4,%xmm5
	jmp	.L_16_blocks_ok_634

.L_16_blocks_overflow_634:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
.L_16_blocks_ok_634:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%xmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%xmm30,%xmm5,%xmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%xmm21,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%xmm29,%xmm21,%xmm21
	vextracti32x4	$0,%zmm21,%xmm7
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_635





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_635
.L_small_initial_partial_block_635:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_635:

	orq	%r8,%r8
	je	.L_after_reduction_635
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_635:
	jmp	.L_last_blocks_done_609
.L_last_num_blocks_is_14_609:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$242,%r15d
	jae	.L_16_blocks_overflow_636
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%ymm27,%ymm4,%ymm5
	jmp	.L_16_blocks_ok_636

.L_16_blocks_overflow_636:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
.L_16_blocks_ok_636:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%ymm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%ymm30,%ymm5,%ymm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%ymm21,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%ymm29,%ymm21,%ymm21
	vextracti32x4	$1,%zmm21,%xmm7
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_637





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_637
.L_small_initial_partial_block_637:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_637:

	orq	%r8,%r8
	je	.L_after_reduction_637
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_637:
	jmp	.L_last_blocks_done_609
.L_last_num_blocks_is_15_609:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$241,%r15d
	jae	.L_16_blocks_overflow_638
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_638

.L_16_blocks_overflow_638:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_638:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%zmm29,%zmm21,%zmm21
	vextracti32x4	$2,%zmm21,%xmm7
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_639





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_639
.L_small_initial_partial_block_639:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_639:

	orq	%r8,%r8
	je	.L_after_reduction_639
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_639:
	jmp	.L_last_blocks_done_609
.L_last_num_blocks_is_16_609:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$240,%r15d
	jae	.L_16_blocks_overflow_640
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_640

.L_16_blocks_overflow_640:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_640:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%zmm29,%zmm21,%zmm21
	vextracti32x4	$3,%zmm21,%xmm7
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_641:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_641:
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_641:
	jmp	.L_last_blocks_done_609
.L_last_num_blocks_is_0_609:
	vmovdqa64	768(%rsp),%zmm13
	vpxorq	%zmm14,%zmm13,%zmm13
	vmovdqu64	0(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	832(%rsp),%zmm13
	vmovdqu64	64(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpxorq	%zmm10,%zmm4,%zmm26
	vpxorq	%zmm6,%zmm0,%zmm24
	vpxorq	%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	896(%rsp),%zmm13
	vmovdqu64	128(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	960(%rsp),%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

.L_last_blocks_done_609:
	vpshufb	%xmm29,%xmm2,%xmm2
	jmp	.L_ghash_done_497

.L_message_below_equal_16_blocks_497:


	movl	%r8d,%r12d
	addl	$15,%r12d
	shrl	$4,%r12d
	cmpq	$8,%r12
	je	.L_small_initial_num_blocks_is_8_642
	jl	.L_small_initial_num_blocks_is_7_1_642


	cmpq	$12,%r12
	je	.L_small_initial_num_blocks_is_12_642
	jl	.L_small_initial_num_blocks_is_11_9_642


	cmpq	$16,%r12
	je	.L_small_initial_num_blocks_is_16_642
	cmpq	$15,%r12
	je	.L_small_initial_num_blocks_is_15_642
	cmpq	$14,%r12
	je	.L_small_initial_num_blocks_is_14_642
	jmp	.L_small_initial_num_blocks_is_13_642

.L_small_initial_num_blocks_is_11_9_642:

	cmpq	$11,%r12
	je	.L_small_initial_num_blocks_is_11_642
	cmpq	$10,%r12
	je	.L_small_initial_num_blocks_is_10_642
	jmp	.L_small_initial_num_blocks_is_9_642

.L_small_initial_num_blocks_is_7_1_642:
	cmpq	$4,%r12
	je	.L_small_initial_num_blocks_is_4_642
	jl	.L_small_initial_num_blocks_is_3_1_642

	cmpq	$7,%r12
	je	.L_small_initial_num_blocks_is_7_642
	cmpq	$6,%r12
	je	.L_small_initial_num_blocks_is_6_642
	jmp	.L_small_initial_num_blocks_is_5_642

.L_small_initial_num_blocks_is_3_1_642:

	cmpq	$3,%r12
	je	.L_small_initial_num_blocks_is_3_642
	cmpq	$2,%r12
	je	.L_small_initial_num_blocks_is_2_642





.L_small_initial_num_blocks_is_1_642:
	vmovdqa64	SHUF_MASK(%rip),%xmm29
	vpaddd	ONE(%rip),%xmm2,%xmm0
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$0,%zmm0,%xmm2
	vpshufb	%xmm29,%xmm0,%xmm0
	vmovdqu8	0(%rcx,%r11,1),%xmm6{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%xmm15,%xmm0,%xmm0
	vpxorq	%xmm6,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm12
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%xmm29,%xmm6,%xmm6
	vextracti32x4	$0,%zmm6,%xmm13


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_643





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm6,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm6,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm6,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm6,%xmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_643
.L_small_initial_partial_block_643:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)











	vpxorq	%xmm13,%xmm14,%xmm14

	jmp	.L_after_reduction_643
.L_small_initial_compute_done_643:
.L_after_reduction_643:
	jmp	.L_small_initial_blocks_encrypted_642
.L_small_initial_num_blocks_is_2_642:
	vmovdqa64	SHUF_MASK(%rip),%ymm29
	vshufi64x2	$0,%ymm2,%ymm2,%ymm0
	vpaddd	ddq_add_1234(%rip),%ymm0,%ymm0
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$1,%zmm0,%xmm2
	vpshufb	%ymm29,%ymm0,%ymm0
	vmovdqu8	0(%rcx,%r11,1),%ymm6{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%ymm15,%ymm0,%ymm0
	vpxorq	%ymm6,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm12
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%ymm29,%ymm6,%ymm6
	vextracti32x4	$1,%zmm6,%xmm13
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_644





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm6,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm6,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm6,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm6,%ymm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_644
.L_small_initial_partial_block_644:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm6,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm6,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm6,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm6,%xmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_644:

	orq	%r8,%r8
	je	.L_after_reduction_644
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_644:
	jmp	.L_small_initial_blocks_encrypted_642
.L_small_initial_num_blocks_is_3_642:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$2,%zmm0,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vmovdqu8	0(%rcx,%r11,1),%zmm6{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm6,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vextracti32x4	$2,%zmm6,%xmm13
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_645





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_645
.L_small_initial_partial_block_645:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm6,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm6,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm6,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm6,%ymm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_645:

	orq	%r8,%r8
	je	.L_after_reduction_645
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_645:
	jmp	.L_small_initial_blocks_encrypted_642
.L_small_initial_num_blocks_is_4_642:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$3,%zmm0,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vmovdqu8	0(%rcx,%r11,1),%zmm6{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm6,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vextracti32x4	$3,%zmm6,%xmm13
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_646





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_646
.L_small_initial_partial_block_646:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_646:

	orq	%r8,%r8
	je	.L_after_reduction_646
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_646:
	jmp	.L_small_initial_blocks_encrypted_642
.L_small_initial_num_blocks_is_5_642:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$64,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$0,%zmm3,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%xmm7{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%xmm15,%xmm3,%xmm3
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%xmm7,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%xmm29,%xmm7,%xmm7
	vextracti32x4	$0,%zmm7,%xmm13
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_647





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm7,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm7,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm7,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm7,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_647
.L_small_initial_partial_block_647:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_647:

	orq	%r8,%r8
	je	.L_after_reduction_647
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_647:
	jmp	.L_small_initial_blocks_encrypted_642
.L_small_initial_num_blocks_is_6_642:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$64,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$1,%zmm3,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%ymm7{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%ymm15,%ymm3,%ymm3
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%ymm7,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%ymm29,%ymm7,%ymm7
	vextracti32x4	$1,%zmm7,%xmm13
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_648





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm7,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm7,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm7,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm7,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_648
.L_small_initial_partial_block_648:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm7,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm7,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm7,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm7,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_648:

	orq	%r8,%r8
	je	.L_after_reduction_648
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_648:
	jmp	.L_small_initial_blocks_encrypted_642
.L_small_initial_num_blocks_is_7_642:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$64,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$2,%zmm3,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%zmm29,%zmm7,%zmm7
	vextracti32x4	$2,%zmm7,%xmm13
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_649





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_649
.L_small_initial_partial_block_649:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm7,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm7,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm7,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm7,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_649:

	orq	%r8,%r8
	je	.L_after_reduction_649
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_649:
	jmp	.L_small_initial_blocks_encrypted_642
.L_small_initial_num_blocks_is_8_642:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$64,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$3,%zmm3,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%zmm29,%zmm7,%zmm7
	vextracti32x4	$3,%zmm7,%xmm13
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_650





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	224(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_650
.L_small_initial_partial_block_650:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_650:

	orq	%r8,%r8
	je	.L_after_reduction_650
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_650:
	jmp	.L_small_initial_blocks_encrypted_642
.L_small_initial_num_blocks_is_9_642:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$128,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$0,%zmm4,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%xmm10{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%xmm15,%xmm4,%xmm4
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%xmm10,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%xmm29,%xmm10,%xmm10
	vextracti32x4	$0,%zmm10,%xmm13
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_651





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	208(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm10,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm10,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm10,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm10,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_651
.L_small_initial_partial_block_651:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	224(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_651:

	orq	%r8,%r8
	je	.L_after_reduction_651
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_651:
	jmp	.L_small_initial_blocks_encrypted_642
.L_small_initial_num_blocks_is_10_642:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$128,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$1,%zmm4,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%ymm10{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%ymm15,%ymm4,%ymm4
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%ymm10,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%ymm29,%ymm10,%ymm10
	vextracti32x4	$1,%zmm10,%xmm13
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_652





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	192(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm10,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm10,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm10,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm10,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_652
.L_small_initial_partial_block_652:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	208(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm10,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm10,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm10,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm10,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_652:

	orq	%r8,%r8
	je	.L_after_reduction_652
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_652:
	jmp	.L_small_initial_blocks_encrypted_642
.L_small_initial_num_blocks_is_11_642:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$128,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$2,%zmm4,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vextracti32x4	$2,%zmm10,%xmm13
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_653





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	176(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_653
.L_small_initial_partial_block_653:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	192(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm10,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm10,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm10,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm10,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_653:

	orq	%r8,%r8
	je	.L_after_reduction_653
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_653:
	jmp	.L_small_initial_blocks_encrypted_642
.L_small_initial_num_blocks_is_12_642:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$128,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$3,%zmm4,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vextracti32x4	$3,%zmm10,%xmm13
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_654





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	160(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	224(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_654
.L_small_initial_partial_block_654:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	176(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_654:

	orq	%r8,%r8
	je	.L_after_reduction_654
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_654:
	jmp	.L_small_initial_blocks_encrypted_642
.L_small_initial_num_blocks_is_13_642:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	vpaddd	ddq_add_8888(%rip),%zmm3,%zmm5
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$192,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$0,%zmm5,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10
	vmovdqu8	192(%rcx,%r11,1),%xmm11{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vpxorq	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vaesenclast	%xmm15,%xmm5,%xmm5
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vpxorq	%xmm11,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%xmm29,%xmm11,%xmm11
	vextracti32x4	$0,%zmm11,%xmm13
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_655





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	144(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	208(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm11,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm11,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm11,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm11,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_655
.L_small_initial_partial_block_655:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	160(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	224(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_655:

	orq	%r8,%r8
	je	.L_after_reduction_655
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_655:
	jmp	.L_small_initial_blocks_encrypted_642
.L_small_initial_num_blocks_is_14_642:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	vpaddd	ddq_add_8888(%rip),%zmm3,%zmm5
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$192,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$1,%zmm5,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10
	vmovdqu8	192(%rcx,%r11,1),%ymm11{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vpxorq	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vaesenclast	%ymm15,%ymm5,%ymm5
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vpxorq	%ymm11,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%ymm29,%ymm11,%ymm11
	vextracti32x4	$1,%zmm11,%xmm13
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_656





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	128(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	192(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm11,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm11,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm11,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm11,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_656
.L_small_initial_partial_block_656:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	144(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	208(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm11,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm11,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm11,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm11,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_656:

	orq	%r8,%r8
	je	.L_after_reduction_656
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_656:
	jmp	.L_small_initial_blocks_encrypted_642
.L_small_initial_num_blocks_is_15_642:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	vpaddd	ddq_add_8888(%rip),%zmm3,%zmm5
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$192,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$2,%zmm5,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10
	vmovdqu8	192(%rcx,%r11,1),%zmm11{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vpxorq	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vaesenclast	%zmm15,%zmm5,%zmm5
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vpxorq	%zmm11,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vextracti32x4	$2,%zmm11,%xmm13
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_657





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	112(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	176(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm11,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm11,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm11,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm11,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_657
.L_small_initial_partial_block_657:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	128(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	192(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm11,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm11,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm11,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm11,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_657:

	orq	%r8,%r8
	je	.L_after_reduction_657
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_657:
	jmp	.L_small_initial_blocks_encrypted_642
.L_small_initial_num_blocks_is_16_642:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	vpaddd	ddq_add_8888(%rip),%zmm3,%zmm5
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$192,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$3,%zmm5,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10
	vmovdqu8	192(%rcx,%r11,1),%zmm11{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vpxorq	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vaesenclast	%zmm15,%zmm5,%zmm5
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vpxorq	%zmm11,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vextracti32x4	$3,%zmm11,%xmm13
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_658:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	112(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	176(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm11,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm11,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm11,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm11,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_658:
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_658:
.L_small_initial_blocks_encrypted_642:
.L_ghash_done_497:
	vmovdqu64	%xmm2,0(%rsi)
	vmovdqu64	%xmm14,64(%rsi)
.L_enc_dec_done_497:
	jmp	.Lexit_gcm_decrypt
.align	32
.Laes_gcm_decrypt_192_avx512:
	orq	%r8,%r8
	je	.L_enc_dec_done_659
	xorq	%r14,%r14
	vmovdqu64	64(%rsi),%xmm14

	movq	(%rdx),%r11
	orq	%r11,%r11
	je	.L_partial_block_done_660
	movl	$16,%r10d
	leaq	byte_len_to_mask_table(%rip),%r12
	cmpq	%r10,%r8
	cmovcq	%r8,%r10
	kmovw	(%r12,%r10,2),%k1
	vmovdqu8	(%rcx),%xmm0{%k1}{z}

	vmovdqu64	16(%rsi),%xmm3
	vmovdqu64	336(%rsi),%xmm4



	leaq	SHIFT_MASK(%rip),%r12
	addq	%r11,%r12
	vmovdqu64	(%r12),%xmm5
	vpshufb	%xmm5,%xmm3,%xmm3

	vmovdqa64	%xmm0,%xmm6
	vpxorq	%xmm0,%xmm3,%xmm3


	leaq	(%r8,%r11,1),%r13
	subq	$16,%r13
	jge	.L_no_extra_mask_660
	subq	%r13,%r12
.L_no_extra_mask_660:



	vmovdqu64	16(%r12),%xmm0
	vpand	%xmm0,%xmm3,%xmm3
	vpand	%xmm0,%xmm6,%xmm6
	vpshufb	SHUF_MASK(%rip),%xmm6,%xmm6
	vpshufb	%xmm5,%xmm6,%xmm6
	vpxorq	%xmm6,%xmm14,%xmm14
	cmpq	$0,%r13
	jl	.L_partial_incomplete_660

	vpclmulqdq	$0x11,%xmm4,%xmm14,%xmm7
	vpclmulqdq	$0x00,%xmm4,%xmm14,%xmm10
	vpclmulqdq	$0x01,%xmm4,%xmm14,%xmm11
	vpclmulqdq	$0x10,%xmm4,%xmm14,%xmm14
	vpxorq	%xmm11,%xmm14,%xmm14

	vpsrldq	$8,%xmm14,%xmm11
	vpslldq	$8,%xmm14,%xmm14
	vpxorq	%xmm11,%xmm7,%xmm7
	vpxorq	%xmm10,%xmm14,%xmm14



	vmovdqu64	POLY2(%rip),%xmm11

	vpclmulqdq	$0x01,%xmm14,%xmm11,%xmm10
	vpslldq	$8,%xmm10,%xmm10
	vpxorq	%xmm10,%xmm14,%xmm14



	vpclmulqdq	$0x00,%xmm14,%xmm11,%xmm10
	vpsrldq	$4,%xmm10,%xmm10
	vpclmulqdq	$0x10,%xmm14,%xmm11,%xmm14
	vpslldq	$4,%xmm14,%xmm14

	vpternlogq	$0x96,%xmm10,%xmm7,%xmm14

	movq	$0,(%rdx)

	movq	%r11,%r12
	movq	$16,%r11
	subq	%r12,%r11
	jmp	.L_enc_dec_done_660

.L_partial_incomplete_660:
	addq	%r8,(%rdx)
	movq	%r8,%r11

.L_enc_dec_done_660:


	leaq	byte_len_to_mask_table(%rip),%r12
	kmovw	(%r12,%r11,2),%k1
	vmovdqu64	%xmm14,64(%rsi)
	movq	%r9,%r12
	vmovdqu8	%xmm3,(%r12){%k1}
.L_partial_block_done_660:
	vmovdqu64	0(%rsi),%xmm2
	subq	%r11,%r8
	je	.L_enc_dec_done_659
	cmpq	$256,%r8
	jbe	.L_message_below_equal_16_blocks_659

	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vmovdqa64	ddq_addbe_4444(%rip),%zmm27
	vmovdqa64	ddq_addbe_1234(%rip),%zmm28






	vmovd	%xmm2,%r15d
	andl	$255,%r15d

	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpshufb	%zmm29,%zmm2,%zmm2



	cmpb	$240,%r15b
	jae	.L_next_16_overflow_661
	vpaddd	%zmm28,%zmm2,%zmm7
	vpaddd	%zmm27,%zmm7,%zmm10
	vpaddd	%zmm27,%zmm10,%zmm11
	vpaddd	%zmm27,%zmm11,%zmm12
	jmp	.L_next_16_ok_661
.L_next_16_overflow_661:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm12
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm7
	vpaddd	%zmm12,%zmm7,%zmm10
	vpaddd	%zmm12,%zmm10,%zmm11
	vpaddd	%zmm12,%zmm11,%zmm12
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vpshufb	%zmm29,%zmm12,%zmm12
.L_next_16_ok_661:
	vshufi64x2	$255,%zmm12,%zmm12,%zmm2
	addb	$16,%r15b

	vmovdqu8	0(%rcx,%r11,1),%zmm0
	vmovdqu8	64(%rcx,%r11,1),%zmm3
	vmovdqu8	128(%rcx,%r11,1),%zmm4
	vmovdqu8	192(%rcx,%r11,1),%zmm5


	vbroadcastf64x2	0(%rdi),%zmm6
	vpxorq	%zmm6,%zmm7,%zmm7
	vpxorq	%zmm6,%zmm10,%zmm10
	vpxorq	%zmm6,%zmm11,%zmm11
	vpxorq	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	16(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	32(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	48(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	64(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	80(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	96(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	112(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	128(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	144(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	160(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	176(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	192(%rdi),%zmm6
	vaesenclast	%zmm6,%zmm7,%zmm7
	vaesenclast	%zmm6,%zmm10,%zmm10
	vaesenclast	%zmm6,%zmm11,%zmm11
	vaesenclast	%zmm6,%zmm12,%zmm12


	vpxorq	%zmm0,%zmm7,%zmm7
	vpxorq	%zmm3,%zmm10,%zmm10
	vpxorq	%zmm4,%zmm11,%zmm11
	vpxorq	%zmm5,%zmm12,%zmm12


	movq	%r9,%r10
	vmovdqu8	%zmm7,0(%r10,%r11,1)
	vmovdqu8	%zmm10,64(%r10,%r11,1)
	vmovdqu8	%zmm11,128(%r10,%r11,1)
	vmovdqu8	%zmm12,192(%r10,%r11,1)

	vpshufb	%zmm29,%zmm0,%zmm7
	vpshufb	%zmm29,%zmm3,%zmm10
	vpshufb	%zmm29,%zmm4,%zmm11
	vpshufb	%zmm29,%zmm5,%zmm12
	vmovdqa64	%zmm7,768(%rsp)
	vmovdqa64	%zmm10,832(%rsp)
	vmovdqa64	%zmm11,896(%rsp)
	vmovdqa64	%zmm12,960(%rsp)
	testq	%r14,%r14
	jnz	.L_skip_hkeys_precomputation_662

	vmovdqu64	288(%rsi),%zmm0
	vmovdqu64	%zmm0,704(%rsp)

	vmovdqu64	224(%rsi),%zmm3
	vmovdqu64	%zmm3,640(%rsp)


	vshufi64x2	$0x00,%zmm3,%zmm3,%zmm3

	vmovdqu64	160(%rsi),%zmm4
	vmovdqu64	%zmm4,576(%rsp)

	vmovdqu64	96(%rsi),%zmm5
	vmovdqu64	%zmm5,512(%rsp)
.L_skip_hkeys_precomputation_662:
	cmpq	$512,%r8
	jb	.L_message_below_32_blocks_659



	cmpb	$240,%r15b
	jae	.L_next_16_overflow_663
	vpaddd	%zmm28,%zmm2,%zmm7
	vpaddd	%zmm27,%zmm7,%zmm10
	vpaddd	%zmm27,%zmm10,%zmm11
	vpaddd	%zmm27,%zmm11,%zmm12
	jmp	.L_next_16_ok_663
.L_next_16_overflow_663:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm12
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm7
	vpaddd	%zmm12,%zmm7,%zmm10
	vpaddd	%zmm12,%zmm10,%zmm11
	vpaddd	%zmm12,%zmm11,%zmm12
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vpshufb	%zmm29,%zmm12,%zmm12
.L_next_16_ok_663:
	vshufi64x2	$255,%zmm12,%zmm12,%zmm2
	addb	$16,%r15b

	vmovdqu8	256(%rcx,%r11,1),%zmm0
	vmovdqu8	320(%rcx,%r11,1),%zmm3
	vmovdqu8	384(%rcx,%r11,1),%zmm4
	vmovdqu8	448(%rcx,%r11,1),%zmm5


	vbroadcastf64x2	0(%rdi),%zmm6
	vpxorq	%zmm6,%zmm7,%zmm7
	vpxorq	%zmm6,%zmm10,%zmm10
	vpxorq	%zmm6,%zmm11,%zmm11
	vpxorq	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	16(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	32(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	48(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	64(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	80(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	96(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	112(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	128(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	144(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	160(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	176(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	192(%rdi),%zmm6
	vaesenclast	%zmm6,%zmm7,%zmm7
	vaesenclast	%zmm6,%zmm10,%zmm10
	vaesenclast	%zmm6,%zmm11,%zmm11
	vaesenclast	%zmm6,%zmm12,%zmm12


	vpxorq	%zmm0,%zmm7,%zmm7
	vpxorq	%zmm3,%zmm10,%zmm10
	vpxorq	%zmm4,%zmm11,%zmm11
	vpxorq	%zmm5,%zmm12,%zmm12


	movq	%r9,%r10
	vmovdqu8	%zmm7,256(%r10,%r11,1)
	vmovdqu8	%zmm10,320(%r10,%r11,1)
	vmovdqu8	%zmm11,384(%r10,%r11,1)
	vmovdqu8	%zmm12,448(%r10,%r11,1)

	vpshufb	%zmm29,%zmm0,%zmm7
	vpshufb	%zmm29,%zmm3,%zmm10
	vpshufb	%zmm29,%zmm4,%zmm11
	vpshufb	%zmm29,%zmm5,%zmm12
	vmovdqa64	%zmm7,1024(%rsp)
	vmovdqa64	%zmm10,1088(%rsp)
	vmovdqa64	%zmm11,1152(%rsp)
	vmovdqa64	%zmm12,1216(%rsp)
	testq	%r14,%r14
	jnz	.L_skip_hkeys_precomputation_664
	vmovdqu64	640(%rsp),%zmm3


	vshufi64x2	$0x00,%zmm3,%zmm3,%zmm3

	vmovdqu64	576(%rsp),%zmm4
	vmovdqu64	512(%rsp),%zmm5

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,448(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,384(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,320(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,256(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,192(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,128(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,64(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,0(%rsp)
.L_skip_hkeys_precomputation_664:
	movq	$1,%r14
	addq	$512,%r11
	subq	$512,%r8

	cmpq	$768,%r8
	jb	.L_no_more_big_nblocks_659
.L_encrypt_big_nblocks_659:
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_665
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_665
.L_16_blocks_overflow_665:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_665:
	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp),%zmm18
	vmovdqa64	832(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp),%zmm1
	vmovdqa64	896(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp),%zmm18
	vmovdqa64	960(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm15,%zmm10,%zmm26
	vpxorq	%zmm12,%zmm6,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1)
	vpshufb	%zmm29,%zmm17,%zmm0
	vpshufb	%zmm29,%zmm19,%zmm3
	vpshufb	%zmm29,%zmm20,%zmm4
	vpshufb	%zmm29,%zmm21,%zmm5
	vmovdqa64	%zmm0,1280(%rsp)
	vmovdqa64	%zmm3,1344(%rsp)
	vmovdqa64	%zmm4,1408(%rsp)
	vmovdqa64	%zmm5,1472(%rsp)
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_666
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_666
.L_16_blocks_overflow_666:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_666:
	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	256(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	320(%rsp),%zmm18
	vmovdqa64	1088(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	384(%rsp),%zmm1
	vmovdqa64	1152(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	448(%rsp),%zmm18
	vmovdqa64	1216(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	256(%rcx,%r11,1),%zmm17
	vmovdqu8	320(%rcx,%r11,1),%zmm19
	vmovdqu8	384(%rcx,%r11,1),%zmm20
	vmovdqu8	448(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vpternlogq	$0x96,%zmm12,%zmm6,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,256(%r10,%r11,1)
	vmovdqu8	%zmm3,320(%r10,%r11,1)
	vmovdqu8	%zmm4,384(%r10,%r11,1)
	vmovdqu8	%zmm5,448(%r10,%r11,1)
	vpshufb	%zmm29,%zmm17,%zmm0
	vpshufb	%zmm29,%zmm19,%zmm3
	vpshufb	%zmm29,%zmm20,%zmm4
	vpshufb	%zmm29,%zmm21,%zmm5
	vmovdqa64	%zmm0,768(%rsp)
	vmovdqa64	%zmm3,832(%rsp)
	vmovdqa64	%zmm4,896(%rsp)
	vmovdqa64	%zmm5,960(%rsp)
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_667
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_667
.L_16_blocks_overflow_667:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_667:
	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	512(%rcx,%r11,1),%zmm17
	vmovdqu8	576(%rcx,%r11,1),%zmm19
	vmovdqu8	640(%rcx,%r11,1),%zmm20
	vmovdqu8	704(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30


	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10

	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpternlogq	$0x96,%zmm15,%zmm12,%zmm6
	vpxorq	%zmm24,%zmm6,%zmm6
	vpternlogq	$0x96,%zmm10,%zmm13,%zmm7
	vpxorq	%zmm25,%zmm7,%zmm7
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vextracti64x4	$1,%zmm6,%ymm12
	vpxorq	%ymm12,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm12
	vpxorq	%xmm12,%xmm6,%xmm6
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm6
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,512(%r10,%r11,1)
	vmovdqu8	%zmm3,576(%r10,%r11,1)
	vmovdqu8	%zmm4,640(%r10,%r11,1)
	vmovdqu8	%zmm5,704(%r10,%r11,1)
	vpshufb	%zmm29,%zmm17,%zmm0
	vpshufb	%zmm29,%zmm19,%zmm3
	vpshufb	%zmm29,%zmm20,%zmm4
	vpshufb	%zmm29,%zmm21,%zmm5
	vmovdqa64	%zmm0,1024(%rsp)
	vmovdqa64	%zmm3,1088(%rsp)
	vmovdqa64	%zmm4,1152(%rsp)
	vmovdqa64	%zmm5,1216(%rsp)
	vmovdqa64	%zmm6,%zmm14

	addq	$768,%r11
	subq	$768,%r8
	cmpq	$768,%r8
	jae	.L_encrypt_big_nblocks_659

.L_no_more_big_nblocks_659:

	cmpq	$512,%r8
	jae	.L_encrypt_32_blocks_659

	cmpq	$256,%r8
	jae	.L_encrypt_16_blocks_659
.L_encrypt_0_blocks_ghash_32_659:
	movl	%r8d,%r10d
	andl	$~15,%r10d
	movl	$256,%ebx
	subl	%r10d,%ebx
	vmovdqa64	768(%rsp),%zmm13
	vpxorq	%zmm14,%zmm13,%zmm13
	vmovdqu64	0(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	832(%rsp),%zmm13
	vmovdqu64	64(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpxorq	%zmm10,%zmm4,%zmm26
	vpxorq	%zmm6,%zmm0,%zmm24
	vpxorq	%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	896(%rsp),%zmm13
	vmovdqu64	128(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	960(%rsp),%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	addl	$256,%ebx
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_668

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_668
	jb	.L_last_num_blocks_is_7_1_668


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_668
	jb	.L_last_num_blocks_is_11_9_668


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_668
	ja	.L_last_num_blocks_is_16_668
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_668
	jmp	.L_last_num_blocks_is_13_668

.L_last_num_blocks_is_11_9_668:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_668
	ja	.L_last_num_blocks_is_11_668
	jmp	.L_last_num_blocks_is_9_668

.L_last_num_blocks_is_7_1_668:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_668
	jb	.L_last_num_blocks_is_3_1_668

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_668
	je	.L_last_num_blocks_is_6_668
	jmp	.L_last_num_blocks_is_5_668

.L_last_num_blocks_is_3_1_668:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_668
	je	.L_last_num_blocks_is_2_668
.L_last_num_blocks_is_1_668:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_669
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_669

.L_16_blocks_overflow_669:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_669:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%xmm29,%xmm17,%xmm17
	vextracti32x4	$0,%zmm17,%xmm7


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_670





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_670
.L_small_initial_partial_block_670:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)


	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm0


	vpclmulqdq	$0x01,%xmm25,%xmm0,%xmm3
	vpslldq	$8,%xmm3,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm3


	vpclmulqdq	$0x00,%xmm3,%xmm0,%xmm4
	vpsrldq	$4,%xmm4,%xmm4
	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm4,%xmm14












	vpxorq	%xmm7,%xmm14,%xmm14

	jmp	.L_after_reduction_670
.L_small_initial_compute_done_670:
.L_after_reduction_670:
	jmp	.L_last_blocks_done_668
.L_last_num_blocks_is_2_668:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$254,%r15d
	jae	.L_16_blocks_overflow_671
	vpaddd	%ymm28,%ymm2,%ymm0
	jmp	.L_16_blocks_ok_671

.L_16_blocks_overflow_671:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%ymm29,%ymm0,%ymm0
.L_16_blocks_ok_671:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%ymm17{%k1}{z}
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vaesenclast	%ymm30,%ymm0,%ymm0
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%ymm29,%ymm17,%ymm17
	vextracti32x4	$1,%zmm17,%xmm7
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_672





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_672
.L_small_initial_partial_block_672:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_672:

	orq	%r8,%r8
	je	.L_after_reduction_672
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_672:
	jmp	.L_last_blocks_done_668
.L_last_num_blocks_is_3_668:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$253,%r15d
	jae	.L_16_blocks_overflow_673
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_673

.L_16_blocks_overflow_673:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_673:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vextracti32x4	$2,%zmm17,%xmm7
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_674





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_674
.L_small_initial_partial_block_674:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_674:

	orq	%r8,%r8
	je	.L_after_reduction_674
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_674:
	jmp	.L_last_blocks_done_668
.L_last_num_blocks_is_4_668:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$252,%r15d
	jae	.L_16_blocks_overflow_675
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_675

.L_16_blocks_overflow_675:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_675:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vextracti32x4	$3,%zmm17,%xmm7
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_676





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_676
.L_small_initial_partial_block_676:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_676:

	orq	%r8,%r8
	je	.L_after_reduction_676
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_676:
	jmp	.L_last_blocks_done_668
.L_last_num_blocks_is_5_668:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$251,%r15d
	jae	.L_16_blocks_overflow_677
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%xmm27,%xmm0,%xmm3
	jmp	.L_16_blocks_ok_677

.L_16_blocks_overflow_677:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
.L_16_blocks_ok_677:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%xmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%xmm30,%xmm3,%xmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%xmm19,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%xmm29,%xmm19,%xmm19
	vextracti32x4	$0,%zmm19,%xmm7
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_678





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_678
.L_small_initial_partial_block_678:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_678:

	orq	%r8,%r8
	je	.L_after_reduction_678
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_678:
	jmp	.L_last_blocks_done_668
.L_last_num_blocks_is_6_668:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$250,%r15d
	jae	.L_16_blocks_overflow_679
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%ymm27,%ymm0,%ymm3
	jmp	.L_16_blocks_ok_679

.L_16_blocks_overflow_679:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
.L_16_blocks_ok_679:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%ymm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%ymm30,%ymm3,%ymm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%ymm29,%ymm19,%ymm19
	vextracti32x4	$1,%zmm19,%xmm7
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_680





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_680
.L_small_initial_partial_block_680:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_680:

	orq	%r8,%r8
	je	.L_after_reduction_680
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_680:
	jmp	.L_last_blocks_done_668
.L_last_num_blocks_is_7_668:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$249,%r15d
	jae	.L_16_blocks_overflow_681
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_681

.L_16_blocks_overflow_681:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_681:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vextracti32x4	$2,%zmm19,%xmm7
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_682





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_682
.L_small_initial_partial_block_682:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_682:

	orq	%r8,%r8
	je	.L_after_reduction_682
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_682:
	jmp	.L_last_blocks_done_668
.L_last_num_blocks_is_8_668:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$248,%r15d
	jae	.L_16_blocks_overflow_683
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_683

.L_16_blocks_overflow_683:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_683:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vextracti32x4	$3,%zmm19,%xmm7
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_684





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_684
.L_small_initial_partial_block_684:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_684:

	orq	%r8,%r8
	je	.L_after_reduction_684
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_684:
	jmp	.L_last_blocks_done_668
.L_last_num_blocks_is_9_668:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$247,%r15d
	jae	.L_16_blocks_overflow_685
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%xmm27,%xmm3,%xmm4
	jmp	.L_16_blocks_ok_685

.L_16_blocks_overflow_685:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
.L_16_blocks_ok_685:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%xmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%xmm30,%xmm4,%xmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%xmm20,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%xmm29,%xmm20,%xmm20
	vextracti32x4	$0,%zmm20,%xmm7
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_686





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_686
.L_small_initial_partial_block_686:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_686:

	orq	%r8,%r8
	je	.L_after_reduction_686
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_686:
	jmp	.L_last_blocks_done_668
.L_last_num_blocks_is_10_668:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$246,%r15d
	jae	.L_16_blocks_overflow_687
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%ymm27,%ymm3,%ymm4
	jmp	.L_16_blocks_ok_687

.L_16_blocks_overflow_687:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
.L_16_blocks_ok_687:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%ymm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%ymm30,%ymm4,%ymm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%ymm20,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%ymm29,%ymm20,%ymm20
	vextracti32x4	$1,%zmm20,%xmm7
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_688





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_688
.L_small_initial_partial_block_688:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_688:

	orq	%r8,%r8
	je	.L_after_reduction_688
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_688:
	jmp	.L_last_blocks_done_668
.L_last_num_blocks_is_11_668:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$245,%r15d
	jae	.L_16_blocks_overflow_689
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_689

.L_16_blocks_overflow_689:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_689:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vextracti32x4	$2,%zmm20,%xmm7
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_690





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_690
.L_small_initial_partial_block_690:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_690:

	orq	%r8,%r8
	je	.L_after_reduction_690
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_690:
	jmp	.L_last_blocks_done_668
.L_last_num_blocks_is_12_668:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$244,%r15d
	jae	.L_16_blocks_overflow_691
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_691

.L_16_blocks_overflow_691:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_691:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vextracti32x4	$3,%zmm20,%xmm7
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_692





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_692
.L_small_initial_partial_block_692:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_692:

	orq	%r8,%r8
	je	.L_after_reduction_692
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_692:
	jmp	.L_last_blocks_done_668
.L_last_num_blocks_is_13_668:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$243,%r15d
	jae	.L_16_blocks_overflow_693
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%xmm27,%xmm4,%xmm5
	jmp	.L_16_blocks_ok_693

.L_16_blocks_overflow_693:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
.L_16_blocks_ok_693:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%xmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%xmm30,%xmm5,%xmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%xmm21,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%xmm29,%xmm21,%xmm21
	vextracti32x4	$0,%zmm21,%xmm7
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_694





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_694
.L_small_initial_partial_block_694:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_694:

	orq	%r8,%r8
	je	.L_after_reduction_694
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_694:
	jmp	.L_last_blocks_done_668
.L_last_num_blocks_is_14_668:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$242,%r15d
	jae	.L_16_blocks_overflow_695
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%ymm27,%ymm4,%ymm5
	jmp	.L_16_blocks_ok_695

.L_16_blocks_overflow_695:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
.L_16_blocks_ok_695:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%ymm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%ymm30,%ymm5,%ymm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%ymm21,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%ymm29,%ymm21,%ymm21
	vextracti32x4	$1,%zmm21,%xmm7
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_696





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_696
.L_small_initial_partial_block_696:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_696:

	orq	%r8,%r8
	je	.L_after_reduction_696
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_696:
	jmp	.L_last_blocks_done_668
.L_last_num_blocks_is_15_668:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$241,%r15d
	jae	.L_16_blocks_overflow_697
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_697

.L_16_blocks_overflow_697:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_697:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%zmm29,%zmm21,%zmm21
	vextracti32x4	$2,%zmm21,%xmm7
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_698





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_698
.L_small_initial_partial_block_698:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_698:

	orq	%r8,%r8
	je	.L_after_reduction_698
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_698:
	jmp	.L_last_blocks_done_668
.L_last_num_blocks_is_16_668:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$240,%r15d
	jae	.L_16_blocks_overflow_699
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_699

.L_16_blocks_overflow_699:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_699:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%zmm29,%zmm21,%zmm21
	vextracti32x4	$3,%zmm21,%xmm7
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_700:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_700:
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_700:
	jmp	.L_last_blocks_done_668
.L_last_num_blocks_is_0_668:
	vmovdqa64	1024(%rsp),%zmm13
	vmovdqu64	0(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1088(%rsp),%zmm13
	vmovdqu64	64(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1152(%rsp),%zmm13
	vmovdqu64	128(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1216(%rsp),%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

.L_last_blocks_done_668:
	vpshufb	%xmm29,%xmm2,%xmm2
	jmp	.L_ghash_done_659
.L_encrypt_32_blocks_659:
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_701
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_701
.L_16_blocks_overflow_701:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_701:
	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp),%zmm18
	vmovdqa64	832(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp),%zmm1
	vmovdqa64	896(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp),%zmm18
	vmovdqa64	960(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm15,%zmm10,%zmm26
	vpxorq	%zmm12,%zmm6,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1)
	vpshufb	%zmm29,%zmm17,%zmm0
	vpshufb	%zmm29,%zmm19,%zmm3
	vpshufb	%zmm29,%zmm20,%zmm4
	vpshufb	%zmm29,%zmm21,%zmm5
	vmovdqa64	%zmm0,1280(%rsp)
	vmovdqa64	%zmm3,1344(%rsp)
	vmovdqa64	%zmm4,1408(%rsp)
	vmovdqa64	%zmm5,1472(%rsp)
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_702
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_702
.L_16_blocks_overflow_702:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_702:
	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	256(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	320(%rsp),%zmm18
	vmovdqa64	1088(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	384(%rsp),%zmm1
	vmovdqa64	1152(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	448(%rsp),%zmm18
	vmovdqa64	1216(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	256(%rcx,%r11,1),%zmm17
	vmovdqu8	320(%rcx,%r11,1),%zmm19
	vmovdqu8	384(%rcx,%r11,1),%zmm20
	vmovdqu8	448(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vpternlogq	$0x96,%zmm12,%zmm6,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,256(%r10,%r11,1)
	vmovdqu8	%zmm3,320(%r10,%r11,1)
	vmovdqu8	%zmm4,384(%r10,%r11,1)
	vmovdqu8	%zmm5,448(%r10,%r11,1)
	vpshufb	%zmm29,%zmm17,%zmm0
	vpshufb	%zmm29,%zmm19,%zmm3
	vpshufb	%zmm29,%zmm20,%zmm4
	vpshufb	%zmm29,%zmm21,%zmm5
	vmovdqa64	%zmm0,768(%rsp)
	vmovdqa64	%zmm3,832(%rsp)
	vmovdqa64	%zmm4,896(%rsp)
	vmovdqa64	%zmm5,960(%rsp)
	vmovdqa64	1280(%rsp),%zmm13
	vmovdqu64	512(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1344(%rsp),%zmm13
	vmovdqu64	576(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1408(%rsp),%zmm13
	vmovdqu64	640(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1472(%rsp),%zmm13
	vmovdqu64	704(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

	subq	$512,%r8
	addq	$512,%r11
	movl	%r8d,%r10d
	andl	$~15,%r10d
	movl	$512,%ebx
	subl	%r10d,%ebx
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_703

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_703
	jb	.L_last_num_blocks_is_7_1_703


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_703
	jb	.L_last_num_blocks_is_11_9_703


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_703
	ja	.L_last_num_blocks_is_16_703
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_703
	jmp	.L_last_num_blocks_is_13_703

.L_last_num_blocks_is_11_9_703:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_703
	ja	.L_last_num_blocks_is_11_703
	jmp	.L_last_num_blocks_is_9_703

.L_last_num_blocks_is_7_1_703:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_703
	jb	.L_last_num_blocks_is_3_1_703

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_703
	je	.L_last_num_blocks_is_6_703
	jmp	.L_last_num_blocks_is_5_703

.L_last_num_blocks_is_3_1_703:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_703
	je	.L_last_num_blocks_is_2_703
.L_last_num_blocks_is_1_703:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_704
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_704

.L_16_blocks_overflow_704:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_704:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%xmm29,%xmm17,%xmm17
	vextracti32x4	$0,%zmm17,%xmm7


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_705





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_705
.L_small_initial_partial_block_705:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)


	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm0


	vpclmulqdq	$0x01,%xmm25,%xmm0,%xmm3
	vpslldq	$8,%xmm3,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm3


	vpclmulqdq	$0x00,%xmm3,%xmm0,%xmm4
	vpsrldq	$4,%xmm4,%xmm4
	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm4,%xmm14












	vpxorq	%xmm7,%xmm14,%xmm14

	jmp	.L_after_reduction_705
.L_small_initial_compute_done_705:
.L_after_reduction_705:
	jmp	.L_last_blocks_done_703
.L_last_num_blocks_is_2_703:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$254,%r15d
	jae	.L_16_blocks_overflow_706
	vpaddd	%ymm28,%ymm2,%ymm0
	jmp	.L_16_blocks_ok_706

.L_16_blocks_overflow_706:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%ymm29,%ymm0,%ymm0
.L_16_blocks_ok_706:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%ymm17{%k1}{z}
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vaesenclast	%ymm30,%ymm0,%ymm0
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%ymm29,%ymm17,%ymm17
	vextracti32x4	$1,%zmm17,%xmm7
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_707





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_707
.L_small_initial_partial_block_707:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_707:

	orq	%r8,%r8
	je	.L_after_reduction_707
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_707:
	jmp	.L_last_blocks_done_703
.L_last_num_blocks_is_3_703:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$253,%r15d
	jae	.L_16_blocks_overflow_708
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_708

.L_16_blocks_overflow_708:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_708:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vextracti32x4	$2,%zmm17,%xmm7
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_709





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_709
.L_small_initial_partial_block_709:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_709:

	orq	%r8,%r8
	je	.L_after_reduction_709
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_709:
	jmp	.L_last_blocks_done_703
.L_last_num_blocks_is_4_703:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$252,%r15d
	jae	.L_16_blocks_overflow_710
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_710

.L_16_blocks_overflow_710:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_710:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vextracti32x4	$3,%zmm17,%xmm7
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_711





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_711
.L_small_initial_partial_block_711:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_711:

	orq	%r8,%r8
	je	.L_after_reduction_711
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_711:
	jmp	.L_last_blocks_done_703
.L_last_num_blocks_is_5_703:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$251,%r15d
	jae	.L_16_blocks_overflow_712
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%xmm27,%xmm0,%xmm3
	jmp	.L_16_blocks_ok_712

.L_16_blocks_overflow_712:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
.L_16_blocks_ok_712:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%xmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%xmm30,%xmm3,%xmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%xmm19,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%xmm29,%xmm19,%xmm19
	vextracti32x4	$0,%zmm19,%xmm7
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_713





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_713
.L_small_initial_partial_block_713:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_713:

	orq	%r8,%r8
	je	.L_after_reduction_713
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_713:
	jmp	.L_last_blocks_done_703
.L_last_num_blocks_is_6_703:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$250,%r15d
	jae	.L_16_blocks_overflow_714
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%ymm27,%ymm0,%ymm3
	jmp	.L_16_blocks_ok_714

.L_16_blocks_overflow_714:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
.L_16_blocks_ok_714:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%ymm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%ymm30,%ymm3,%ymm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%ymm29,%ymm19,%ymm19
	vextracti32x4	$1,%zmm19,%xmm7
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_715





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_715
.L_small_initial_partial_block_715:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_715:

	orq	%r8,%r8
	je	.L_after_reduction_715
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_715:
	jmp	.L_last_blocks_done_703
.L_last_num_blocks_is_7_703:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$249,%r15d
	jae	.L_16_blocks_overflow_716
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_716

.L_16_blocks_overflow_716:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_716:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vextracti32x4	$2,%zmm19,%xmm7
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_717





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_717
.L_small_initial_partial_block_717:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_717:

	orq	%r8,%r8
	je	.L_after_reduction_717
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_717:
	jmp	.L_last_blocks_done_703
.L_last_num_blocks_is_8_703:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$248,%r15d
	jae	.L_16_blocks_overflow_718
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_718

.L_16_blocks_overflow_718:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_718:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vextracti32x4	$3,%zmm19,%xmm7
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_719





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_719
.L_small_initial_partial_block_719:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_719:

	orq	%r8,%r8
	je	.L_after_reduction_719
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_719:
	jmp	.L_last_blocks_done_703
.L_last_num_blocks_is_9_703:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$247,%r15d
	jae	.L_16_blocks_overflow_720
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%xmm27,%xmm3,%xmm4
	jmp	.L_16_blocks_ok_720

.L_16_blocks_overflow_720:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
.L_16_blocks_ok_720:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%xmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%xmm30,%xmm4,%xmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%xmm20,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%xmm29,%xmm20,%xmm20
	vextracti32x4	$0,%zmm20,%xmm7
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_721





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_721
.L_small_initial_partial_block_721:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_721:

	orq	%r8,%r8
	je	.L_after_reduction_721
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_721:
	jmp	.L_last_blocks_done_703
.L_last_num_blocks_is_10_703:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$246,%r15d
	jae	.L_16_blocks_overflow_722
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%ymm27,%ymm3,%ymm4
	jmp	.L_16_blocks_ok_722

.L_16_blocks_overflow_722:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
.L_16_blocks_ok_722:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%ymm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%ymm30,%ymm4,%ymm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%ymm20,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%ymm29,%ymm20,%ymm20
	vextracti32x4	$1,%zmm20,%xmm7
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_723





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_723
.L_small_initial_partial_block_723:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_723:

	orq	%r8,%r8
	je	.L_after_reduction_723
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_723:
	jmp	.L_last_blocks_done_703
.L_last_num_blocks_is_11_703:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$245,%r15d
	jae	.L_16_blocks_overflow_724
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_724

.L_16_blocks_overflow_724:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_724:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vextracti32x4	$2,%zmm20,%xmm7
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_725





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_725
.L_small_initial_partial_block_725:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_725:

	orq	%r8,%r8
	je	.L_after_reduction_725
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_725:
	jmp	.L_last_blocks_done_703
.L_last_num_blocks_is_12_703:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$244,%r15d
	jae	.L_16_blocks_overflow_726
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_726

.L_16_blocks_overflow_726:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_726:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vextracti32x4	$3,%zmm20,%xmm7
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_727





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_727
.L_small_initial_partial_block_727:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_727:

	orq	%r8,%r8
	je	.L_after_reduction_727
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_727:
	jmp	.L_last_blocks_done_703
.L_last_num_blocks_is_13_703:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$243,%r15d
	jae	.L_16_blocks_overflow_728
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%xmm27,%xmm4,%xmm5
	jmp	.L_16_blocks_ok_728

.L_16_blocks_overflow_728:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
.L_16_blocks_ok_728:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%xmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%xmm30,%xmm5,%xmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%xmm21,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%xmm29,%xmm21,%xmm21
	vextracti32x4	$0,%zmm21,%xmm7
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_729





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_729
.L_small_initial_partial_block_729:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_729:

	orq	%r8,%r8
	je	.L_after_reduction_729
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_729:
	jmp	.L_last_blocks_done_703
.L_last_num_blocks_is_14_703:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$242,%r15d
	jae	.L_16_blocks_overflow_730
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%ymm27,%ymm4,%ymm5
	jmp	.L_16_blocks_ok_730

.L_16_blocks_overflow_730:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
.L_16_blocks_ok_730:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%ymm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%ymm30,%ymm5,%ymm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%ymm21,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%ymm29,%ymm21,%ymm21
	vextracti32x4	$1,%zmm21,%xmm7
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_731





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_731
.L_small_initial_partial_block_731:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_731:

	orq	%r8,%r8
	je	.L_after_reduction_731
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_731:
	jmp	.L_last_blocks_done_703
.L_last_num_blocks_is_15_703:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$241,%r15d
	jae	.L_16_blocks_overflow_732
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_732

.L_16_blocks_overflow_732:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_732:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%zmm29,%zmm21,%zmm21
	vextracti32x4	$2,%zmm21,%xmm7
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_733





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_733
.L_small_initial_partial_block_733:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_733:

	orq	%r8,%r8
	je	.L_after_reduction_733
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_733:
	jmp	.L_last_blocks_done_703
.L_last_num_blocks_is_16_703:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$240,%r15d
	jae	.L_16_blocks_overflow_734
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_734

.L_16_blocks_overflow_734:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_734:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%zmm29,%zmm21,%zmm21
	vextracti32x4	$3,%zmm21,%xmm7
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_735:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_735:
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_735:
	jmp	.L_last_blocks_done_703
.L_last_num_blocks_is_0_703:
	vmovdqa64	768(%rsp),%zmm13
	vpxorq	%zmm14,%zmm13,%zmm13
	vmovdqu64	0(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	832(%rsp),%zmm13
	vmovdqu64	64(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpxorq	%zmm10,%zmm4,%zmm26
	vpxorq	%zmm6,%zmm0,%zmm24
	vpxorq	%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	896(%rsp),%zmm13
	vmovdqu64	128(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	960(%rsp),%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

.L_last_blocks_done_703:
	vpshufb	%xmm29,%xmm2,%xmm2
	jmp	.L_ghash_done_659
.L_encrypt_16_blocks_659:
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_736
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_736
.L_16_blocks_overflow_736:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_736:
	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp),%zmm18
	vmovdqa64	832(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp),%zmm1
	vmovdqa64	896(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp),%zmm18
	vmovdqa64	960(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm15,%zmm10,%zmm26
	vpxorq	%zmm12,%zmm6,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1)
	vpshufb	%zmm29,%zmm17,%zmm0
	vpshufb	%zmm29,%zmm19,%zmm3
	vpshufb	%zmm29,%zmm20,%zmm4
	vpshufb	%zmm29,%zmm21,%zmm5
	vmovdqa64	%zmm0,1280(%rsp)
	vmovdqa64	%zmm3,1344(%rsp)
	vmovdqa64	%zmm4,1408(%rsp)
	vmovdqa64	%zmm5,1472(%rsp)
	vmovdqa64	1024(%rsp),%zmm13
	vmovdqu64	256(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1088(%rsp),%zmm13
	vmovdqu64	320(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1152(%rsp),%zmm13
	vmovdqu64	384(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1216(%rsp),%zmm13
	vmovdqu64	448(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	subq	$256,%r8
	addq	$256,%r11
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_737

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_737
	jb	.L_last_num_blocks_is_7_1_737


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_737
	jb	.L_last_num_blocks_is_11_9_737


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_737
	ja	.L_last_num_blocks_is_16_737
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_737
	jmp	.L_last_num_blocks_is_13_737

.L_last_num_blocks_is_11_9_737:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_737
	ja	.L_last_num_blocks_is_11_737
	jmp	.L_last_num_blocks_is_9_737

.L_last_num_blocks_is_7_1_737:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_737
	jb	.L_last_num_blocks_is_3_1_737

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_737
	je	.L_last_num_blocks_is_6_737
	jmp	.L_last_num_blocks_is_5_737

.L_last_num_blocks_is_3_1_737:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_737
	je	.L_last_num_blocks_is_2_737
.L_last_num_blocks_is_1_737:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_738
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_738

.L_16_blocks_overflow_738:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_738:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%xmm31,%xmm0,%xmm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%xmm29,%xmm17,%xmm17
	vextracti32x4	$0,%zmm17,%xmm7


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_739





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_739
.L_small_initial_partial_block_739:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)











	vpxorq	%xmm7,%xmm14,%xmm14

	jmp	.L_after_reduction_739
.L_small_initial_compute_done_739:
.L_after_reduction_739:
	jmp	.L_last_blocks_done_737
.L_last_num_blocks_is_2_737:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$254,%r15d
	jae	.L_16_blocks_overflow_740
	vpaddd	%ymm28,%ymm2,%ymm0
	jmp	.L_16_blocks_ok_740

.L_16_blocks_overflow_740:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%ymm29,%ymm0,%ymm0
.L_16_blocks_ok_740:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%ymm17{%k1}{z}
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%ymm31,%ymm0,%ymm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%ymm30,%ymm0,%ymm0
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%ymm29,%ymm17,%ymm17
	vextracti32x4	$1,%zmm17,%xmm7
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_741





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_741
.L_small_initial_partial_block_741:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_741:

	orq	%r8,%r8
	je	.L_after_reduction_741
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_741:
	jmp	.L_last_blocks_done_737
.L_last_num_blocks_is_3_737:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$253,%r15d
	jae	.L_16_blocks_overflow_742
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_742

.L_16_blocks_overflow_742:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_742:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vextracti32x4	$2,%zmm17,%xmm7
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_743





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_743
.L_small_initial_partial_block_743:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_743:

	orq	%r8,%r8
	je	.L_after_reduction_743
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_743:
	jmp	.L_last_blocks_done_737
.L_last_num_blocks_is_4_737:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$252,%r15d
	jae	.L_16_blocks_overflow_744
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_744

.L_16_blocks_overflow_744:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_744:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vextracti32x4	$3,%zmm17,%xmm7
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_745





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_745
.L_small_initial_partial_block_745:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_745:

	orq	%r8,%r8
	je	.L_after_reduction_745
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_745:
	jmp	.L_last_blocks_done_737
.L_last_num_blocks_is_5_737:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$251,%r15d
	jae	.L_16_blocks_overflow_746
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%xmm27,%xmm0,%xmm3
	jmp	.L_16_blocks_ok_746

.L_16_blocks_overflow_746:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
.L_16_blocks_ok_746:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%xmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%xmm30,%xmm3,%xmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%xmm19,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%xmm29,%xmm19,%xmm19
	vextracti32x4	$0,%zmm19,%xmm7
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_747





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_747
.L_small_initial_partial_block_747:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_747:

	orq	%r8,%r8
	je	.L_after_reduction_747
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_747:
	jmp	.L_last_blocks_done_737
.L_last_num_blocks_is_6_737:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$250,%r15d
	jae	.L_16_blocks_overflow_748
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%ymm27,%ymm0,%ymm3
	jmp	.L_16_blocks_ok_748

.L_16_blocks_overflow_748:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
.L_16_blocks_ok_748:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%ymm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%ymm30,%ymm3,%ymm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%ymm29,%ymm19,%ymm19
	vextracti32x4	$1,%zmm19,%xmm7
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_749





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_749
.L_small_initial_partial_block_749:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_749:

	orq	%r8,%r8
	je	.L_after_reduction_749
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_749:
	jmp	.L_last_blocks_done_737
.L_last_num_blocks_is_7_737:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$249,%r15d
	jae	.L_16_blocks_overflow_750
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_750

.L_16_blocks_overflow_750:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_750:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vextracti32x4	$2,%zmm19,%xmm7
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_751





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_751
.L_small_initial_partial_block_751:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_751:

	orq	%r8,%r8
	je	.L_after_reduction_751
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_751:
	jmp	.L_last_blocks_done_737
.L_last_num_blocks_is_8_737:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$248,%r15d
	jae	.L_16_blocks_overflow_752
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_752

.L_16_blocks_overflow_752:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_752:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vextracti32x4	$3,%zmm19,%xmm7
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_753





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_753
.L_small_initial_partial_block_753:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_753:

	orq	%r8,%r8
	je	.L_after_reduction_753
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_753:
	jmp	.L_last_blocks_done_737
.L_last_num_blocks_is_9_737:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$247,%r15d
	jae	.L_16_blocks_overflow_754
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%xmm27,%xmm3,%xmm4
	jmp	.L_16_blocks_ok_754

.L_16_blocks_overflow_754:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
.L_16_blocks_ok_754:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%xmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%xmm30,%xmm4,%xmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%xmm20,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%xmm29,%xmm20,%xmm20
	vextracti32x4	$0,%zmm20,%xmm7
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_755





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_755
.L_small_initial_partial_block_755:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_755:

	orq	%r8,%r8
	je	.L_after_reduction_755
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_755:
	jmp	.L_last_blocks_done_737
.L_last_num_blocks_is_10_737:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$246,%r15d
	jae	.L_16_blocks_overflow_756
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%ymm27,%ymm3,%ymm4
	jmp	.L_16_blocks_ok_756

.L_16_blocks_overflow_756:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
.L_16_blocks_ok_756:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%ymm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%ymm30,%ymm4,%ymm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%ymm20,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%ymm29,%ymm20,%ymm20
	vextracti32x4	$1,%zmm20,%xmm7
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_757





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_757
.L_small_initial_partial_block_757:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_757:

	orq	%r8,%r8
	je	.L_after_reduction_757
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_757:
	jmp	.L_last_blocks_done_737
.L_last_num_blocks_is_11_737:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$245,%r15d
	jae	.L_16_blocks_overflow_758
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_758

.L_16_blocks_overflow_758:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_758:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vextracti32x4	$2,%zmm20,%xmm7
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_759





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_759
.L_small_initial_partial_block_759:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_759:

	orq	%r8,%r8
	je	.L_after_reduction_759
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_759:
	jmp	.L_last_blocks_done_737
.L_last_num_blocks_is_12_737:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$244,%r15d
	jae	.L_16_blocks_overflow_760
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_760

.L_16_blocks_overflow_760:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_760:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vextracti32x4	$3,%zmm20,%xmm7
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_761





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_761
.L_small_initial_partial_block_761:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_761:

	orq	%r8,%r8
	je	.L_after_reduction_761
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_761:
	jmp	.L_last_blocks_done_737
.L_last_num_blocks_is_13_737:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$243,%r15d
	jae	.L_16_blocks_overflow_762
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%xmm27,%xmm4,%xmm5
	jmp	.L_16_blocks_ok_762

.L_16_blocks_overflow_762:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
.L_16_blocks_ok_762:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%xmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%xmm30,%xmm5,%xmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%xmm21,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%xmm29,%xmm21,%xmm21
	vextracti32x4	$0,%zmm21,%xmm7
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_763





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_763
.L_small_initial_partial_block_763:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_763:

	orq	%r8,%r8
	je	.L_after_reduction_763
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_763:
	jmp	.L_last_blocks_done_737
.L_last_num_blocks_is_14_737:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$242,%r15d
	jae	.L_16_blocks_overflow_764
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%ymm27,%ymm4,%ymm5
	jmp	.L_16_blocks_ok_764

.L_16_blocks_overflow_764:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
.L_16_blocks_ok_764:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%ymm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%ymm30,%ymm5,%ymm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%ymm21,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%ymm29,%ymm21,%ymm21
	vextracti32x4	$1,%zmm21,%xmm7
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_765





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_765
.L_small_initial_partial_block_765:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_765:

	orq	%r8,%r8
	je	.L_after_reduction_765
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_765:
	jmp	.L_last_blocks_done_737
.L_last_num_blocks_is_15_737:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$241,%r15d
	jae	.L_16_blocks_overflow_766
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_766

.L_16_blocks_overflow_766:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_766:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%zmm29,%zmm21,%zmm21
	vextracti32x4	$2,%zmm21,%xmm7
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_767





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_767
.L_small_initial_partial_block_767:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_767:

	orq	%r8,%r8
	je	.L_after_reduction_767
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_767:
	jmp	.L_last_blocks_done_737
.L_last_num_blocks_is_16_737:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$240,%r15d
	jae	.L_16_blocks_overflow_768
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_768

.L_16_blocks_overflow_768:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_768:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%zmm29,%zmm21,%zmm21
	vextracti32x4	$3,%zmm21,%xmm7
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_769:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_769:
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_769:
	jmp	.L_last_blocks_done_737
.L_last_num_blocks_is_0_737:
	vmovdqa64	1280(%rsp),%zmm13
	vmovdqu64	512(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1344(%rsp),%zmm13
	vmovdqu64	576(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1408(%rsp),%zmm13
	vmovdqu64	640(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1472(%rsp),%zmm13
	vmovdqu64	704(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

.L_last_blocks_done_737:
	vpshufb	%xmm29,%xmm2,%xmm2
	jmp	.L_ghash_done_659

.L_message_below_32_blocks_659:


	subq	$256,%r8
	addq	$256,%r11
	movl	%r8d,%r10d
	testq	%r14,%r14
	jnz	.L_skip_hkeys_precomputation_770
	vmovdqu64	640(%rsp),%zmm3


	vshufi64x2	$0x00,%zmm3,%zmm3,%zmm3

	vmovdqu64	576(%rsp),%zmm4
	vmovdqu64	512(%rsp),%zmm5

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,448(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,384(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,320(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,256(%rsp)
.L_skip_hkeys_precomputation_770:
	movq	$1,%r14
	andl	$~15,%r10d
	movl	$512,%ebx
	subl	%r10d,%ebx
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_771

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_771
	jb	.L_last_num_blocks_is_7_1_771


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_771
	jb	.L_last_num_blocks_is_11_9_771


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_771
	ja	.L_last_num_blocks_is_16_771
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_771
	jmp	.L_last_num_blocks_is_13_771

.L_last_num_blocks_is_11_9_771:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_771
	ja	.L_last_num_blocks_is_11_771
	jmp	.L_last_num_blocks_is_9_771

.L_last_num_blocks_is_7_1_771:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_771
	jb	.L_last_num_blocks_is_3_1_771

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_771
	je	.L_last_num_blocks_is_6_771
	jmp	.L_last_num_blocks_is_5_771

.L_last_num_blocks_is_3_1_771:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_771
	je	.L_last_num_blocks_is_2_771
.L_last_num_blocks_is_1_771:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_772
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_772

.L_16_blocks_overflow_772:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_772:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%xmm29,%xmm17,%xmm17
	vextracti32x4	$0,%zmm17,%xmm7


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_773





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_773
.L_small_initial_partial_block_773:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)


	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm0


	vpclmulqdq	$0x01,%xmm25,%xmm0,%xmm3
	vpslldq	$8,%xmm3,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm3


	vpclmulqdq	$0x00,%xmm3,%xmm0,%xmm4
	vpsrldq	$4,%xmm4,%xmm4
	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm4,%xmm14












	vpxorq	%xmm7,%xmm14,%xmm14

	jmp	.L_after_reduction_773
.L_small_initial_compute_done_773:
.L_after_reduction_773:
	jmp	.L_last_blocks_done_771
.L_last_num_blocks_is_2_771:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$254,%r15d
	jae	.L_16_blocks_overflow_774
	vpaddd	%ymm28,%ymm2,%ymm0
	jmp	.L_16_blocks_ok_774

.L_16_blocks_overflow_774:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%ymm29,%ymm0,%ymm0
.L_16_blocks_ok_774:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%ymm17{%k1}{z}
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vaesenclast	%ymm30,%ymm0,%ymm0
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%ymm29,%ymm17,%ymm17
	vextracti32x4	$1,%zmm17,%xmm7
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_775





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_775
.L_small_initial_partial_block_775:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_775:

	orq	%r8,%r8
	je	.L_after_reduction_775
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_775:
	jmp	.L_last_blocks_done_771
.L_last_num_blocks_is_3_771:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$253,%r15d
	jae	.L_16_blocks_overflow_776
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_776

.L_16_blocks_overflow_776:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_776:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vextracti32x4	$2,%zmm17,%xmm7
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_777





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_777
.L_small_initial_partial_block_777:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_777:

	orq	%r8,%r8
	je	.L_after_reduction_777
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_777:
	jmp	.L_last_blocks_done_771
.L_last_num_blocks_is_4_771:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$252,%r15d
	jae	.L_16_blocks_overflow_778
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_778

.L_16_blocks_overflow_778:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_778:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vextracti32x4	$3,%zmm17,%xmm7
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_779





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_779
.L_small_initial_partial_block_779:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_779:

	orq	%r8,%r8
	je	.L_after_reduction_779
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_779:
	jmp	.L_last_blocks_done_771
.L_last_num_blocks_is_5_771:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$251,%r15d
	jae	.L_16_blocks_overflow_780
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%xmm27,%xmm0,%xmm3
	jmp	.L_16_blocks_ok_780

.L_16_blocks_overflow_780:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
.L_16_blocks_ok_780:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%xmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%xmm30,%xmm3,%xmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%xmm19,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%xmm29,%xmm19,%xmm19
	vextracti32x4	$0,%zmm19,%xmm7
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_781





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_781
.L_small_initial_partial_block_781:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_781:

	orq	%r8,%r8
	je	.L_after_reduction_781
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_781:
	jmp	.L_last_blocks_done_771
.L_last_num_blocks_is_6_771:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$250,%r15d
	jae	.L_16_blocks_overflow_782
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%ymm27,%ymm0,%ymm3
	jmp	.L_16_blocks_ok_782

.L_16_blocks_overflow_782:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
.L_16_blocks_ok_782:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%ymm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%ymm30,%ymm3,%ymm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%ymm29,%ymm19,%ymm19
	vextracti32x4	$1,%zmm19,%xmm7
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_783





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_783
.L_small_initial_partial_block_783:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_783:

	orq	%r8,%r8
	je	.L_after_reduction_783
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_783:
	jmp	.L_last_blocks_done_771
.L_last_num_blocks_is_7_771:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$249,%r15d
	jae	.L_16_blocks_overflow_784
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_784

.L_16_blocks_overflow_784:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_784:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vextracti32x4	$2,%zmm19,%xmm7
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_785





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_785
.L_small_initial_partial_block_785:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_785:

	orq	%r8,%r8
	je	.L_after_reduction_785
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_785:
	jmp	.L_last_blocks_done_771
.L_last_num_blocks_is_8_771:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$248,%r15d
	jae	.L_16_blocks_overflow_786
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_786

.L_16_blocks_overflow_786:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_786:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vextracti32x4	$3,%zmm19,%xmm7
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_787





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_787
.L_small_initial_partial_block_787:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_787:

	orq	%r8,%r8
	je	.L_after_reduction_787
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_787:
	jmp	.L_last_blocks_done_771
.L_last_num_blocks_is_9_771:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$247,%r15d
	jae	.L_16_blocks_overflow_788
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%xmm27,%xmm3,%xmm4
	jmp	.L_16_blocks_ok_788

.L_16_blocks_overflow_788:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
.L_16_blocks_ok_788:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%xmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%xmm30,%xmm4,%xmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%xmm20,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%xmm29,%xmm20,%xmm20
	vextracti32x4	$0,%zmm20,%xmm7
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_789





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_789
.L_small_initial_partial_block_789:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_789:

	orq	%r8,%r8
	je	.L_after_reduction_789
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_789:
	jmp	.L_last_blocks_done_771
.L_last_num_blocks_is_10_771:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$246,%r15d
	jae	.L_16_blocks_overflow_790
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%ymm27,%ymm3,%ymm4
	jmp	.L_16_blocks_ok_790

.L_16_blocks_overflow_790:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
.L_16_blocks_ok_790:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%ymm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%ymm30,%ymm4,%ymm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%ymm20,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%ymm29,%ymm20,%ymm20
	vextracti32x4	$1,%zmm20,%xmm7
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_791





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_791
.L_small_initial_partial_block_791:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_791:

	orq	%r8,%r8
	je	.L_after_reduction_791
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_791:
	jmp	.L_last_blocks_done_771
.L_last_num_blocks_is_11_771:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$245,%r15d
	jae	.L_16_blocks_overflow_792
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_792

.L_16_blocks_overflow_792:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_792:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vextracti32x4	$2,%zmm20,%xmm7
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_793





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_793
.L_small_initial_partial_block_793:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_793:

	orq	%r8,%r8
	je	.L_after_reduction_793
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_793:
	jmp	.L_last_blocks_done_771
.L_last_num_blocks_is_12_771:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$244,%r15d
	jae	.L_16_blocks_overflow_794
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_794

.L_16_blocks_overflow_794:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_794:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vextracti32x4	$3,%zmm20,%xmm7
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_795





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_795
.L_small_initial_partial_block_795:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_795:

	orq	%r8,%r8
	je	.L_after_reduction_795
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_795:
	jmp	.L_last_blocks_done_771
.L_last_num_blocks_is_13_771:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$243,%r15d
	jae	.L_16_blocks_overflow_796
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%xmm27,%xmm4,%xmm5
	jmp	.L_16_blocks_ok_796

.L_16_blocks_overflow_796:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
.L_16_blocks_ok_796:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%xmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%xmm30,%xmm5,%xmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%xmm21,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%xmm29,%xmm21,%xmm21
	vextracti32x4	$0,%zmm21,%xmm7
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_797





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_797
.L_small_initial_partial_block_797:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_797:

	orq	%r8,%r8
	je	.L_after_reduction_797
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_797:
	jmp	.L_last_blocks_done_771
.L_last_num_blocks_is_14_771:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$242,%r15d
	jae	.L_16_blocks_overflow_798
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%ymm27,%ymm4,%ymm5
	jmp	.L_16_blocks_ok_798

.L_16_blocks_overflow_798:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
.L_16_blocks_ok_798:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%ymm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%ymm30,%ymm5,%ymm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%ymm21,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%ymm29,%ymm21,%ymm21
	vextracti32x4	$1,%zmm21,%xmm7
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_799





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_799
.L_small_initial_partial_block_799:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_799:

	orq	%r8,%r8
	je	.L_after_reduction_799
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_799:
	jmp	.L_last_blocks_done_771
.L_last_num_blocks_is_15_771:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$241,%r15d
	jae	.L_16_blocks_overflow_800
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_800

.L_16_blocks_overflow_800:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_800:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%zmm29,%zmm21,%zmm21
	vextracti32x4	$2,%zmm21,%xmm7
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_801





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_801
.L_small_initial_partial_block_801:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_801:

	orq	%r8,%r8
	je	.L_after_reduction_801
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_801:
	jmp	.L_last_blocks_done_771
.L_last_num_blocks_is_16_771:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$240,%r15d
	jae	.L_16_blocks_overflow_802
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_802

.L_16_blocks_overflow_802:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_802:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%zmm29,%zmm21,%zmm21
	vextracti32x4	$3,%zmm21,%xmm7
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_803:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_803:
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_803:
	jmp	.L_last_blocks_done_771
.L_last_num_blocks_is_0_771:
	vmovdqa64	768(%rsp),%zmm13
	vpxorq	%zmm14,%zmm13,%zmm13
	vmovdqu64	0(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	832(%rsp),%zmm13
	vmovdqu64	64(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpxorq	%zmm10,%zmm4,%zmm26
	vpxorq	%zmm6,%zmm0,%zmm24
	vpxorq	%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	896(%rsp),%zmm13
	vmovdqu64	128(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	960(%rsp),%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

.L_last_blocks_done_771:
	vpshufb	%xmm29,%xmm2,%xmm2
	jmp	.L_ghash_done_659

.L_message_below_equal_16_blocks_659:


	movl	%r8d,%r12d
	addl	$15,%r12d
	shrl	$4,%r12d
	cmpq	$8,%r12
	je	.L_small_initial_num_blocks_is_8_804
	jl	.L_small_initial_num_blocks_is_7_1_804


	cmpq	$12,%r12
	je	.L_small_initial_num_blocks_is_12_804
	jl	.L_small_initial_num_blocks_is_11_9_804


	cmpq	$16,%r12
	je	.L_small_initial_num_blocks_is_16_804
	cmpq	$15,%r12
	je	.L_small_initial_num_blocks_is_15_804
	cmpq	$14,%r12
	je	.L_small_initial_num_blocks_is_14_804
	jmp	.L_small_initial_num_blocks_is_13_804

.L_small_initial_num_blocks_is_11_9_804:

	cmpq	$11,%r12
	je	.L_small_initial_num_blocks_is_11_804
	cmpq	$10,%r12
	je	.L_small_initial_num_blocks_is_10_804
	jmp	.L_small_initial_num_blocks_is_9_804

.L_small_initial_num_blocks_is_7_1_804:
	cmpq	$4,%r12
	je	.L_small_initial_num_blocks_is_4_804
	jl	.L_small_initial_num_blocks_is_3_1_804

	cmpq	$7,%r12
	je	.L_small_initial_num_blocks_is_7_804
	cmpq	$6,%r12
	je	.L_small_initial_num_blocks_is_6_804
	jmp	.L_small_initial_num_blocks_is_5_804

.L_small_initial_num_blocks_is_3_1_804:

	cmpq	$3,%r12
	je	.L_small_initial_num_blocks_is_3_804
	cmpq	$2,%r12
	je	.L_small_initial_num_blocks_is_2_804





.L_small_initial_num_blocks_is_1_804:
	vmovdqa64	SHUF_MASK(%rip),%xmm29
	vpaddd	ONE(%rip),%xmm2,%xmm0
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$0,%zmm0,%xmm2
	vpshufb	%xmm29,%xmm0,%xmm0
	vmovdqu8	0(%rcx,%r11,1),%xmm6{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%xmm15,%xmm0,%xmm0
	vpxorq	%xmm6,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm12
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%xmm29,%xmm6,%xmm6
	vextracti32x4	$0,%zmm6,%xmm13


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_805





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm6,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm6,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm6,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm6,%xmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_805
.L_small_initial_partial_block_805:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)











	vpxorq	%xmm13,%xmm14,%xmm14

	jmp	.L_after_reduction_805
.L_small_initial_compute_done_805:
.L_after_reduction_805:
	jmp	.L_small_initial_blocks_encrypted_804
.L_small_initial_num_blocks_is_2_804:
	vmovdqa64	SHUF_MASK(%rip),%ymm29
	vshufi64x2	$0,%ymm2,%ymm2,%ymm0
	vpaddd	ddq_add_1234(%rip),%ymm0,%ymm0
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$1,%zmm0,%xmm2
	vpshufb	%ymm29,%ymm0,%ymm0
	vmovdqu8	0(%rcx,%r11,1),%ymm6{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%ymm15,%ymm0,%ymm0
	vpxorq	%ymm6,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm12
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%ymm29,%ymm6,%ymm6
	vextracti32x4	$1,%zmm6,%xmm13
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_806





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm6,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm6,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm6,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm6,%ymm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_806
.L_small_initial_partial_block_806:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm6,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm6,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm6,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm6,%xmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_806:

	orq	%r8,%r8
	je	.L_after_reduction_806
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_806:
	jmp	.L_small_initial_blocks_encrypted_804
.L_small_initial_num_blocks_is_3_804:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$2,%zmm0,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vmovdqu8	0(%rcx,%r11,1),%zmm6{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm6,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vextracti32x4	$2,%zmm6,%xmm13
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_807





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_807
.L_small_initial_partial_block_807:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm6,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm6,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm6,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm6,%ymm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_807:

	orq	%r8,%r8
	je	.L_after_reduction_807
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_807:
	jmp	.L_small_initial_blocks_encrypted_804
.L_small_initial_num_blocks_is_4_804:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$3,%zmm0,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vmovdqu8	0(%rcx,%r11,1),%zmm6{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm6,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vextracti32x4	$3,%zmm6,%xmm13
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_808





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_808
.L_small_initial_partial_block_808:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_808:

	orq	%r8,%r8
	je	.L_after_reduction_808
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_808:
	jmp	.L_small_initial_blocks_encrypted_804
.L_small_initial_num_blocks_is_5_804:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$64,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$0,%zmm3,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%xmm7{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%xmm15,%xmm3,%xmm3
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%xmm7,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%xmm29,%xmm7,%xmm7
	vextracti32x4	$0,%zmm7,%xmm13
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_809





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm7,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm7,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm7,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm7,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_809
.L_small_initial_partial_block_809:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_809:

	orq	%r8,%r8
	je	.L_after_reduction_809
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_809:
	jmp	.L_small_initial_blocks_encrypted_804
.L_small_initial_num_blocks_is_6_804:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$64,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$1,%zmm3,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%ymm7{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%ymm15,%ymm3,%ymm3
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%ymm7,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%ymm29,%ymm7,%ymm7
	vextracti32x4	$1,%zmm7,%xmm13
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_810





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm7,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm7,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm7,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm7,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_810
.L_small_initial_partial_block_810:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm7,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm7,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm7,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm7,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_810:

	orq	%r8,%r8
	je	.L_after_reduction_810
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_810:
	jmp	.L_small_initial_blocks_encrypted_804
.L_small_initial_num_blocks_is_7_804:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$64,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$2,%zmm3,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%zmm29,%zmm7,%zmm7
	vextracti32x4	$2,%zmm7,%xmm13
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_811





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_811
.L_small_initial_partial_block_811:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm7,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm7,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm7,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm7,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_811:

	orq	%r8,%r8
	je	.L_after_reduction_811
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_811:
	jmp	.L_small_initial_blocks_encrypted_804
.L_small_initial_num_blocks_is_8_804:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$64,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$3,%zmm3,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%zmm29,%zmm7,%zmm7
	vextracti32x4	$3,%zmm7,%xmm13
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_812





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	224(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_812
.L_small_initial_partial_block_812:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_812:

	orq	%r8,%r8
	je	.L_after_reduction_812
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_812:
	jmp	.L_small_initial_blocks_encrypted_804
.L_small_initial_num_blocks_is_9_804:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$128,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$0,%zmm4,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%xmm10{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%xmm15,%xmm4,%xmm4
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%xmm10,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%xmm29,%xmm10,%xmm10
	vextracti32x4	$0,%zmm10,%xmm13
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_813





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	208(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm10,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm10,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm10,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm10,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_813
.L_small_initial_partial_block_813:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	224(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_813:

	orq	%r8,%r8
	je	.L_after_reduction_813
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_813:
	jmp	.L_small_initial_blocks_encrypted_804
.L_small_initial_num_blocks_is_10_804:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$128,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$1,%zmm4,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%ymm10{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%ymm15,%ymm4,%ymm4
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%ymm10,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%ymm29,%ymm10,%ymm10
	vextracti32x4	$1,%zmm10,%xmm13
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_814





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	192(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm10,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm10,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm10,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm10,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_814
.L_small_initial_partial_block_814:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	208(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm10,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm10,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm10,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm10,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_814:

	orq	%r8,%r8
	je	.L_after_reduction_814
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_814:
	jmp	.L_small_initial_blocks_encrypted_804
.L_small_initial_num_blocks_is_11_804:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$128,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$2,%zmm4,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vextracti32x4	$2,%zmm10,%xmm13
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_815





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	176(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_815
.L_small_initial_partial_block_815:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	192(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm10,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm10,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm10,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm10,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_815:

	orq	%r8,%r8
	je	.L_after_reduction_815
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_815:
	jmp	.L_small_initial_blocks_encrypted_804
.L_small_initial_num_blocks_is_12_804:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$128,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$3,%zmm4,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vextracti32x4	$3,%zmm10,%xmm13
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_816





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	160(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	224(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_816
.L_small_initial_partial_block_816:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	176(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_816:

	orq	%r8,%r8
	je	.L_after_reduction_816
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_816:
	jmp	.L_small_initial_blocks_encrypted_804
.L_small_initial_num_blocks_is_13_804:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	vpaddd	ddq_add_8888(%rip),%zmm3,%zmm5
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$192,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$0,%zmm5,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10
	vmovdqu8	192(%rcx,%r11,1),%xmm11{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vpxorq	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vaesenclast	%xmm15,%xmm5,%xmm5
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vpxorq	%xmm11,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%xmm29,%xmm11,%xmm11
	vextracti32x4	$0,%zmm11,%xmm13
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_817





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	144(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	208(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm11,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm11,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm11,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm11,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_817
.L_small_initial_partial_block_817:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	160(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	224(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_817:

	orq	%r8,%r8
	je	.L_after_reduction_817
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_817:
	jmp	.L_small_initial_blocks_encrypted_804
.L_small_initial_num_blocks_is_14_804:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	vpaddd	ddq_add_8888(%rip),%zmm3,%zmm5
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$192,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$1,%zmm5,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10
	vmovdqu8	192(%rcx,%r11,1),%ymm11{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vpxorq	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vaesenclast	%ymm15,%ymm5,%ymm5
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vpxorq	%ymm11,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%ymm29,%ymm11,%ymm11
	vextracti32x4	$1,%zmm11,%xmm13
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_818





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	128(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	192(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm11,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm11,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm11,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm11,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_818
.L_small_initial_partial_block_818:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	144(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	208(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm11,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm11,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm11,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm11,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_818:

	orq	%r8,%r8
	je	.L_after_reduction_818
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_818:
	jmp	.L_small_initial_blocks_encrypted_804
.L_small_initial_num_blocks_is_15_804:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	vpaddd	ddq_add_8888(%rip),%zmm3,%zmm5
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$192,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$2,%zmm5,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10
	vmovdqu8	192(%rcx,%r11,1),%zmm11{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vpxorq	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vaesenclast	%zmm15,%zmm5,%zmm5
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vpxorq	%zmm11,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vextracti32x4	$2,%zmm11,%xmm13
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_819





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	112(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	176(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm11,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm11,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm11,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm11,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_819
.L_small_initial_partial_block_819:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	128(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	192(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm11,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm11,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm11,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm11,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_819:

	orq	%r8,%r8
	je	.L_after_reduction_819
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_819:
	jmp	.L_small_initial_blocks_encrypted_804
.L_small_initial_num_blocks_is_16_804:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	vpaddd	ddq_add_8888(%rip),%zmm3,%zmm5
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$192,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$3,%zmm5,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10
	vmovdqu8	192(%rcx,%r11,1),%zmm11{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vpxorq	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vaesenclast	%zmm15,%zmm5,%zmm5
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vpxorq	%zmm11,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vextracti32x4	$3,%zmm11,%xmm13
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_820:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	112(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	176(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm11,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm11,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm11,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm11,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_820:
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_820:
.L_small_initial_blocks_encrypted_804:
.L_ghash_done_659:
	vmovdqu64	%xmm2,0(%rsi)
	vmovdqu64	%xmm14,64(%rsi)
.L_enc_dec_done_659:
	jmp	.Lexit_gcm_decrypt
.align	32
.Laes_gcm_decrypt_256_avx512:
	orq	%r8,%r8
	je	.L_enc_dec_done_821
	xorq	%r14,%r14
	vmovdqu64	64(%rsi),%xmm14

	movq	(%rdx),%r11
	orq	%r11,%r11
	je	.L_partial_block_done_822
	movl	$16,%r10d
	leaq	byte_len_to_mask_table(%rip),%r12
	cmpq	%r10,%r8
	cmovcq	%r8,%r10
	kmovw	(%r12,%r10,2),%k1
	vmovdqu8	(%rcx),%xmm0{%k1}{z}

	vmovdqu64	16(%rsi),%xmm3
	vmovdqu64	336(%rsi),%xmm4



	leaq	SHIFT_MASK(%rip),%r12
	addq	%r11,%r12
	vmovdqu64	(%r12),%xmm5
	vpshufb	%xmm5,%xmm3,%xmm3

	vmovdqa64	%xmm0,%xmm6
	vpxorq	%xmm0,%xmm3,%xmm3


	leaq	(%r8,%r11,1),%r13
	subq	$16,%r13
	jge	.L_no_extra_mask_822
	subq	%r13,%r12
.L_no_extra_mask_822:



	vmovdqu64	16(%r12),%xmm0
	vpand	%xmm0,%xmm3,%xmm3
	vpand	%xmm0,%xmm6,%xmm6
	vpshufb	SHUF_MASK(%rip),%xmm6,%xmm6
	vpshufb	%xmm5,%xmm6,%xmm6
	vpxorq	%xmm6,%xmm14,%xmm14
	cmpq	$0,%r13
	jl	.L_partial_incomplete_822

	vpclmulqdq	$0x11,%xmm4,%xmm14,%xmm7
	vpclmulqdq	$0x00,%xmm4,%xmm14,%xmm10
	vpclmulqdq	$0x01,%xmm4,%xmm14,%xmm11
	vpclmulqdq	$0x10,%xmm4,%xmm14,%xmm14
	vpxorq	%xmm11,%xmm14,%xmm14

	vpsrldq	$8,%xmm14,%xmm11
	vpslldq	$8,%xmm14,%xmm14
	vpxorq	%xmm11,%xmm7,%xmm7
	vpxorq	%xmm10,%xmm14,%xmm14



	vmovdqu64	POLY2(%rip),%xmm11

	vpclmulqdq	$0x01,%xmm14,%xmm11,%xmm10
	vpslldq	$8,%xmm10,%xmm10
	vpxorq	%xmm10,%xmm14,%xmm14



	vpclmulqdq	$0x00,%xmm14,%xmm11,%xmm10
	vpsrldq	$4,%xmm10,%xmm10
	vpclmulqdq	$0x10,%xmm14,%xmm11,%xmm14
	vpslldq	$4,%xmm14,%xmm14

	vpternlogq	$0x96,%xmm10,%xmm7,%xmm14

	movq	$0,(%rdx)

	movq	%r11,%r12
	movq	$16,%r11
	subq	%r12,%r11
	jmp	.L_enc_dec_done_822

.L_partial_incomplete_822:
	addq	%r8,(%rdx)
	movq	%r8,%r11

.L_enc_dec_done_822:


	leaq	byte_len_to_mask_table(%rip),%r12
	kmovw	(%r12,%r11,2),%k1
	vmovdqu64	%xmm14,64(%rsi)
	movq	%r9,%r12
	vmovdqu8	%xmm3,(%r12){%k1}
.L_partial_block_done_822:
	vmovdqu64	0(%rsi),%xmm2
	subq	%r11,%r8
	je	.L_enc_dec_done_821
	cmpq	$256,%r8
	jbe	.L_message_below_equal_16_blocks_821

	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vmovdqa64	ddq_addbe_4444(%rip),%zmm27
	vmovdqa64	ddq_addbe_1234(%rip),%zmm28






	vmovd	%xmm2,%r15d
	andl	$255,%r15d

	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpshufb	%zmm29,%zmm2,%zmm2



	cmpb	$240,%r15b
	jae	.L_next_16_overflow_823
	vpaddd	%zmm28,%zmm2,%zmm7
	vpaddd	%zmm27,%zmm7,%zmm10
	vpaddd	%zmm27,%zmm10,%zmm11
	vpaddd	%zmm27,%zmm11,%zmm12
	jmp	.L_next_16_ok_823
.L_next_16_overflow_823:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm12
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm7
	vpaddd	%zmm12,%zmm7,%zmm10
	vpaddd	%zmm12,%zmm10,%zmm11
	vpaddd	%zmm12,%zmm11,%zmm12
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vpshufb	%zmm29,%zmm12,%zmm12
.L_next_16_ok_823:
	vshufi64x2	$255,%zmm12,%zmm12,%zmm2
	addb	$16,%r15b

	vmovdqu8	0(%rcx,%r11,1),%zmm0
	vmovdqu8	64(%rcx,%r11,1),%zmm3
	vmovdqu8	128(%rcx,%r11,1),%zmm4
	vmovdqu8	192(%rcx,%r11,1),%zmm5


	vbroadcastf64x2	0(%rdi),%zmm6
	vpxorq	%zmm6,%zmm7,%zmm7
	vpxorq	%zmm6,%zmm10,%zmm10
	vpxorq	%zmm6,%zmm11,%zmm11
	vpxorq	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	16(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	32(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	48(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	64(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	80(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	96(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	112(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	128(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	144(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	160(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	176(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	192(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	208(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	224(%rdi),%zmm6
	vaesenclast	%zmm6,%zmm7,%zmm7
	vaesenclast	%zmm6,%zmm10,%zmm10
	vaesenclast	%zmm6,%zmm11,%zmm11
	vaesenclast	%zmm6,%zmm12,%zmm12


	vpxorq	%zmm0,%zmm7,%zmm7
	vpxorq	%zmm3,%zmm10,%zmm10
	vpxorq	%zmm4,%zmm11,%zmm11
	vpxorq	%zmm5,%zmm12,%zmm12


	movq	%r9,%r10
	vmovdqu8	%zmm7,0(%r10,%r11,1)
	vmovdqu8	%zmm10,64(%r10,%r11,1)
	vmovdqu8	%zmm11,128(%r10,%r11,1)
	vmovdqu8	%zmm12,192(%r10,%r11,1)

	vpshufb	%zmm29,%zmm0,%zmm7
	vpshufb	%zmm29,%zmm3,%zmm10
	vpshufb	%zmm29,%zmm4,%zmm11
	vpshufb	%zmm29,%zmm5,%zmm12
	vmovdqa64	%zmm7,768(%rsp)
	vmovdqa64	%zmm10,832(%rsp)
	vmovdqa64	%zmm11,896(%rsp)
	vmovdqa64	%zmm12,960(%rsp)
	testq	%r14,%r14
	jnz	.L_skip_hkeys_precomputation_824

	vmovdqu64	288(%rsi),%zmm0
	vmovdqu64	%zmm0,704(%rsp)

	vmovdqu64	224(%rsi),%zmm3
	vmovdqu64	%zmm3,640(%rsp)


	vshufi64x2	$0x00,%zmm3,%zmm3,%zmm3

	vmovdqu64	160(%rsi),%zmm4
	vmovdqu64	%zmm4,576(%rsp)

	vmovdqu64	96(%rsi),%zmm5
	vmovdqu64	%zmm5,512(%rsp)
.L_skip_hkeys_precomputation_824:
	cmpq	$512,%r8
	jb	.L_message_below_32_blocks_821



	cmpb	$240,%r15b
	jae	.L_next_16_overflow_825
	vpaddd	%zmm28,%zmm2,%zmm7
	vpaddd	%zmm27,%zmm7,%zmm10
	vpaddd	%zmm27,%zmm10,%zmm11
	vpaddd	%zmm27,%zmm11,%zmm12
	jmp	.L_next_16_ok_825
.L_next_16_overflow_825:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm12
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm7
	vpaddd	%zmm12,%zmm7,%zmm10
	vpaddd	%zmm12,%zmm10,%zmm11
	vpaddd	%zmm12,%zmm11,%zmm12
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vpshufb	%zmm29,%zmm12,%zmm12
.L_next_16_ok_825:
	vshufi64x2	$255,%zmm12,%zmm12,%zmm2
	addb	$16,%r15b

	vmovdqu8	256(%rcx,%r11,1),%zmm0
	vmovdqu8	320(%rcx,%r11,1),%zmm3
	vmovdqu8	384(%rcx,%r11,1),%zmm4
	vmovdqu8	448(%rcx,%r11,1),%zmm5


	vbroadcastf64x2	0(%rdi),%zmm6
	vpxorq	%zmm6,%zmm7,%zmm7
	vpxorq	%zmm6,%zmm10,%zmm10
	vpxorq	%zmm6,%zmm11,%zmm11
	vpxorq	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	16(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	32(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	48(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	64(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	80(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	96(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	112(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	128(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	144(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	160(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	176(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	192(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	208(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	224(%rdi),%zmm6
	vaesenclast	%zmm6,%zmm7,%zmm7
	vaesenclast	%zmm6,%zmm10,%zmm10
	vaesenclast	%zmm6,%zmm11,%zmm11
	vaesenclast	%zmm6,%zmm12,%zmm12


	vpxorq	%zmm0,%zmm7,%zmm7
	vpxorq	%zmm3,%zmm10,%zmm10
	vpxorq	%zmm4,%zmm11,%zmm11
	vpxorq	%zmm5,%zmm12,%zmm12


	movq	%r9,%r10
	vmovdqu8	%zmm7,256(%r10,%r11,1)
	vmovdqu8	%zmm10,320(%r10,%r11,1)
	vmovdqu8	%zmm11,384(%r10,%r11,1)
	vmovdqu8	%zmm12,448(%r10,%r11,1)

	vpshufb	%zmm29,%zmm0,%zmm7
	vpshufb	%zmm29,%zmm3,%zmm10
	vpshufb	%zmm29,%zmm4,%zmm11
	vpshufb	%zmm29,%zmm5,%zmm12
	vmovdqa64	%zmm7,1024(%rsp)
	vmovdqa64	%zmm10,1088(%rsp)
	vmovdqa64	%zmm11,1152(%rsp)
	vmovdqa64	%zmm12,1216(%rsp)
	testq	%r14,%r14
	jnz	.L_skip_hkeys_precomputation_826
	vmovdqu64	640(%rsp),%zmm3


	vshufi64x2	$0x00,%zmm3,%zmm3,%zmm3

	vmovdqu64	576(%rsp),%zmm4
	vmovdqu64	512(%rsp),%zmm5

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,448(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,384(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,320(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,256(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,192(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,128(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,64(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,0(%rsp)
.L_skip_hkeys_precomputation_826:
	movq	$1,%r14
	addq	$512,%r11
	subq	$512,%r8

	cmpq	$768,%r8
	jb	.L_no_more_big_nblocks_821
.L_encrypt_big_nblocks_821:
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_827
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_827
.L_16_blocks_overflow_827:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_827:
	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp),%zmm18
	vmovdqa64	832(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp),%zmm1
	vmovdqa64	896(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp),%zmm18
	vmovdqa64	960(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm15,%zmm10,%zmm26
	vpxorq	%zmm12,%zmm6,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm31

	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1)
	vpshufb	%zmm29,%zmm17,%zmm0
	vpshufb	%zmm29,%zmm19,%zmm3
	vpshufb	%zmm29,%zmm20,%zmm4
	vpshufb	%zmm29,%zmm21,%zmm5
	vmovdqa64	%zmm0,1280(%rsp)
	vmovdqa64	%zmm3,1344(%rsp)
	vmovdqa64	%zmm4,1408(%rsp)
	vmovdqa64	%zmm5,1472(%rsp)
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_828
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_828
.L_16_blocks_overflow_828:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_828:
	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	256(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	320(%rsp),%zmm18
	vmovdqa64	1088(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	384(%rsp),%zmm1
	vmovdqa64	1152(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	448(%rsp),%zmm18
	vmovdqa64	1216(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	256(%rcx,%r11,1),%zmm17
	vmovdqu8	320(%rcx,%r11,1),%zmm19
	vmovdqu8	384(%rcx,%r11,1),%zmm20
	vmovdqu8	448(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vpternlogq	$0x96,%zmm12,%zmm6,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm31

	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,256(%r10,%r11,1)
	vmovdqu8	%zmm3,320(%r10,%r11,1)
	vmovdqu8	%zmm4,384(%r10,%r11,1)
	vmovdqu8	%zmm5,448(%r10,%r11,1)
	vpshufb	%zmm29,%zmm17,%zmm0
	vpshufb	%zmm29,%zmm19,%zmm3
	vpshufb	%zmm29,%zmm20,%zmm4
	vpshufb	%zmm29,%zmm21,%zmm5
	vmovdqa64	%zmm0,768(%rsp)
	vmovdqa64	%zmm3,832(%rsp)
	vmovdqa64	%zmm4,896(%rsp)
	vmovdqa64	%zmm5,960(%rsp)
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_829
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_829
.L_16_blocks_overflow_829:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_829:
	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	512(%rcx,%r11,1),%zmm17
	vmovdqu8	576(%rcx,%r11,1),%zmm19
	vmovdqu8	640(%rcx,%r11,1),%zmm20
	vmovdqu8	704(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30


	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10

	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpternlogq	$0x96,%zmm15,%zmm12,%zmm6
	vpxorq	%zmm24,%zmm6,%zmm6
	vpternlogq	$0x96,%zmm10,%zmm13,%zmm7
	vpxorq	%zmm25,%zmm7,%zmm7
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vextracti64x4	$1,%zmm6,%ymm12
	vpxorq	%ymm12,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm12
	vpxorq	%xmm12,%xmm6,%xmm6
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm31

	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm6
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,512(%r10,%r11,1)
	vmovdqu8	%zmm3,576(%r10,%r11,1)
	vmovdqu8	%zmm4,640(%r10,%r11,1)
	vmovdqu8	%zmm5,704(%r10,%r11,1)
	vpshufb	%zmm29,%zmm17,%zmm0
	vpshufb	%zmm29,%zmm19,%zmm3
	vpshufb	%zmm29,%zmm20,%zmm4
	vpshufb	%zmm29,%zmm21,%zmm5
	vmovdqa64	%zmm0,1024(%rsp)
	vmovdqa64	%zmm3,1088(%rsp)
	vmovdqa64	%zmm4,1152(%rsp)
	vmovdqa64	%zmm5,1216(%rsp)
	vmovdqa64	%zmm6,%zmm14

	addq	$768,%r11
	subq	$768,%r8
	cmpq	$768,%r8
	jae	.L_encrypt_big_nblocks_821

.L_no_more_big_nblocks_821:

	cmpq	$512,%r8
	jae	.L_encrypt_32_blocks_821

	cmpq	$256,%r8
	jae	.L_encrypt_16_blocks_821
.L_encrypt_0_blocks_ghash_32_821:
	movl	%r8d,%r10d
	andl	$~15,%r10d
	movl	$256,%ebx
	subl	%r10d,%ebx
	vmovdqa64	768(%rsp),%zmm13
	vpxorq	%zmm14,%zmm13,%zmm13
	vmovdqu64	0(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	832(%rsp),%zmm13
	vmovdqu64	64(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpxorq	%zmm10,%zmm4,%zmm26
	vpxorq	%zmm6,%zmm0,%zmm24
	vpxorq	%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	896(%rsp),%zmm13
	vmovdqu64	128(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	960(%rsp),%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	addl	$256,%ebx
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_830

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_830
	jb	.L_last_num_blocks_is_7_1_830


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_830
	jb	.L_last_num_blocks_is_11_9_830


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_830
	ja	.L_last_num_blocks_is_16_830
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_830
	jmp	.L_last_num_blocks_is_13_830

.L_last_num_blocks_is_11_9_830:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_830
	ja	.L_last_num_blocks_is_11_830
	jmp	.L_last_num_blocks_is_9_830

.L_last_num_blocks_is_7_1_830:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_830
	jb	.L_last_num_blocks_is_3_1_830

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_830
	je	.L_last_num_blocks_is_6_830
	jmp	.L_last_num_blocks_is_5_830

.L_last_num_blocks_is_3_1_830:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_830
	je	.L_last_num_blocks_is_2_830
.L_last_num_blocks_is_1_830:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_831
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_831

.L_16_blocks_overflow_831:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_831:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%xmm29,%xmm17,%xmm17
	vextracti32x4	$0,%zmm17,%xmm7


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_832





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_832
.L_small_initial_partial_block_832:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)


	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm0


	vpclmulqdq	$0x01,%xmm25,%xmm0,%xmm3
	vpslldq	$8,%xmm3,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm3


	vpclmulqdq	$0x00,%xmm3,%xmm0,%xmm4
	vpsrldq	$4,%xmm4,%xmm4
	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm4,%xmm14












	vpxorq	%xmm7,%xmm14,%xmm14

	jmp	.L_after_reduction_832
.L_small_initial_compute_done_832:
.L_after_reduction_832:
	jmp	.L_last_blocks_done_830
.L_last_num_blocks_is_2_830:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$254,%r15d
	jae	.L_16_blocks_overflow_833
	vpaddd	%ymm28,%ymm2,%ymm0
	jmp	.L_16_blocks_ok_833

.L_16_blocks_overflow_833:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%ymm29,%ymm0,%ymm0
.L_16_blocks_ok_833:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%ymm17{%k1}{z}
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vaesenclast	%ymm30,%ymm0,%ymm0
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%ymm29,%ymm17,%ymm17
	vextracti32x4	$1,%zmm17,%xmm7
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_834





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_834
.L_small_initial_partial_block_834:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_834:

	orq	%r8,%r8
	je	.L_after_reduction_834
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_834:
	jmp	.L_last_blocks_done_830
.L_last_num_blocks_is_3_830:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$253,%r15d
	jae	.L_16_blocks_overflow_835
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_835

.L_16_blocks_overflow_835:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_835:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vextracti32x4	$2,%zmm17,%xmm7
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_836





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_836
.L_small_initial_partial_block_836:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_836:

	orq	%r8,%r8
	je	.L_after_reduction_836
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_836:
	jmp	.L_last_blocks_done_830
.L_last_num_blocks_is_4_830:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$252,%r15d
	jae	.L_16_blocks_overflow_837
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_837

.L_16_blocks_overflow_837:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_837:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vextracti32x4	$3,%zmm17,%xmm7
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_838





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_838
.L_small_initial_partial_block_838:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_838:

	orq	%r8,%r8
	je	.L_after_reduction_838
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_838:
	jmp	.L_last_blocks_done_830
.L_last_num_blocks_is_5_830:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$251,%r15d
	jae	.L_16_blocks_overflow_839
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%xmm27,%xmm0,%xmm3
	jmp	.L_16_blocks_ok_839

.L_16_blocks_overflow_839:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
.L_16_blocks_ok_839:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%xmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%xmm30,%xmm3,%xmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%xmm19,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%xmm29,%xmm19,%xmm19
	vextracti32x4	$0,%zmm19,%xmm7
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_840





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_840
.L_small_initial_partial_block_840:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_840:

	orq	%r8,%r8
	je	.L_after_reduction_840
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_840:
	jmp	.L_last_blocks_done_830
.L_last_num_blocks_is_6_830:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$250,%r15d
	jae	.L_16_blocks_overflow_841
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%ymm27,%ymm0,%ymm3
	jmp	.L_16_blocks_ok_841

.L_16_blocks_overflow_841:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
.L_16_blocks_ok_841:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%ymm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%ymm30,%ymm3,%ymm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%ymm29,%ymm19,%ymm19
	vextracti32x4	$1,%zmm19,%xmm7
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_842





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_842
.L_small_initial_partial_block_842:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_842:

	orq	%r8,%r8
	je	.L_after_reduction_842
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_842:
	jmp	.L_last_blocks_done_830
.L_last_num_blocks_is_7_830:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$249,%r15d
	jae	.L_16_blocks_overflow_843
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_843

.L_16_blocks_overflow_843:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_843:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vextracti32x4	$2,%zmm19,%xmm7
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_844





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_844
.L_small_initial_partial_block_844:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_844:

	orq	%r8,%r8
	je	.L_after_reduction_844
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_844:
	jmp	.L_last_blocks_done_830
.L_last_num_blocks_is_8_830:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$248,%r15d
	jae	.L_16_blocks_overflow_845
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_845

.L_16_blocks_overflow_845:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_845:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vextracti32x4	$3,%zmm19,%xmm7
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_846





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_846
.L_small_initial_partial_block_846:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_846:

	orq	%r8,%r8
	je	.L_after_reduction_846
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_846:
	jmp	.L_last_blocks_done_830
.L_last_num_blocks_is_9_830:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$247,%r15d
	jae	.L_16_blocks_overflow_847
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%xmm27,%xmm3,%xmm4
	jmp	.L_16_blocks_ok_847

.L_16_blocks_overflow_847:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
.L_16_blocks_ok_847:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%xmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%xmm30,%xmm4,%xmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%xmm20,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%xmm29,%xmm20,%xmm20
	vextracti32x4	$0,%zmm20,%xmm7
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_848





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_848
.L_small_initial_partial_block_848:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_848:

	orq	%r8,%r8
	je	.L_after_reduction_848
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_848:
	jmp	.L_last_blocks_done_830
.L_last_num_blocks_is_10_830:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$246,%r15d
	jae	.L_16_blocks_overflow_849
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%ymm27,%ymm3,%ymm4
	jmp	.L_16_blocks_ok_849

.L_16_blocks_overflow_849:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
.L_16_blocks_ok_849:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%ymm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%ymm30,%ymm4,%ymm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%ymm20,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%ymm29,%ymm20,%ymm20
	vextracti32x4	$1,%zmm20,%xmm7
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_850





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_850
.L_small_initial_partial_block_850:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_850:

	orq	%r8,%r8
	je	.L_after_reduction_850
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_850:
	jmp	.L_last_blocks_done_830
.L_last_num_blocks_is_11_830:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$245,%r15d
	jae	.L_16_blocks_overflow_851
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_851

.L_16_blocks_overflow_851:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_851:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vextracti32x4	$2,%zmm20,%xmm7
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_852





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_852
.L_small_initial_partial_block_852:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_852:

	orq	%r8,%r8
	je	.L_after_reduction_852
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_852:
	jmp	.L_last_blocks_done_830
.L_last_num_blocks_is_12_830:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$244,%r15d
	jae	.L_16_blocks_overflow_853
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_853

.L_16_blocks_overflow_853:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_853:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vextracti32x4	$3,%zmm20,%xmm7
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_854





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_854
.L_small_initial_partial_block_854:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_854:

	orq	%r8,%r8
	je	.L_after_reduction_854
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_854:
	jmp	.L_last_blocks_done_830
.L_last_num_blocks_is_13_830:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$243,%r15d
	jae	.L_16_blocks_overflow_855
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%xmm27,%xmm4,%xmm5
	jmp	.L_16_blocks_ok_855

.L_16_blocks_overflow_855:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
.L_16_blocks_ok_855:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%xmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%xmm30,%xmm5,%xmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%xmm21,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%xmm29,%xmm21,%xmm21
	vextracti32x4	$0,%zmm21,%xmm7
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_856





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_856
.L_small_initial_partial_block_856:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_856:

	orq	%r8,%r8
	je	.L_after_reduction_856
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_856:
	jmp	.L_last_blocks_done_830
.L_last_num_blocks_is_14_830:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$242,%r15d
	jae	.L_16_blocks_overflow_857
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%ymm27,%ymm4,%ymm5
	jmp	.L_16_blocks_ok_857

.L_16_blocks_overflow_857:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
.L_16_blocks_ok_857:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%ymm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%ymm30,%ymm5,%ymm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%ymm21,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%ymm29,%ymm21,%ymm21
	vextracti32x4	$1,%zmm21,%xmm7
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_858





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_858
.L_small_initial_partial_block_858:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_858:

	orq	%r8,%r8
	je	.L_after_reduction_858
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_858:
	jmp	.L_last_blocks_done_830
.L_last_num_blocks_is_15_830:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$241,%r15d
	jae	.L_16_blocks_overflow_859
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_859

.L_16_blocks_overflow_859:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_859:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%zmm29,%zmm21,%zmm21
	vextracti32x4	$2,%zmm21,%xmm7
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_860





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_860
.L_small_initial_partial_block_860:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_860:

	orq	%r8,%r8
	je	.L_after_reduction_860
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_860:
	jmp	.L_last_blocks_done_830
.L_last_num_blocks_is_16_830:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$240,%r15d
	jae	.L_16_blocks_overflow_861
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_861

.L_16_blocks_overflow_861:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_861:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%zmm29,%zmm21,%zmm21
	vextracti32x4	$3,%zmm21,%xmm7
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_862:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_862:
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_862:
	jmp	.L_last_blocks_done_830
.L_last_num_blocks_is_0_830:
	vmovdqa64	1024(%rsp),%zmm13
	vmovdqu64	0(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1088(%rsp),%zmm13
	vmovdqu64	64(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1152(%rsp),%zmm13
	vmovdqu64	128(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1216(%rsp),%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

.L_last_blocks_done_830:
	vpshufb	%xmm29,%xmm2,%xmm2
	jmp	.L_ghash_done_821
.L_encrypt_32_blocks_821:
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_863
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_863
.L_16_blocks_overflow_863:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_863:
	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp),%zmm18
	vmovdqa64	832(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp),%zmm1
	vmovdqa64	896(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp),%zmm18
	vmovdqa64	960(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm15,%zmm10,%zmm26
	vpxorq	%zmm12,%zmm6,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm31

	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1)
	vpshufb	%zmm29,%zmm17,%zmm0
	vpshufb	%zmm29,%zmm19,%zmm3
	vpshufb	%zmm29,%zmm20,%zmm4
	vpshufb	%zmm29,%zmm21,%zmm5
	vmovdqa64	%zmm0,1280(%rsp)
	vmovdqa64	%zmm3,1344(%rsp)
	vmovdqa64	%zmm4,1408(%rsp)
	vmovdqa64	%zmm5,1472(%rsp)
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_864
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_864
.L_16_blocks_overflow_864:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_864:
	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	256(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	320(%rsp),%zmm18
	vmovdqa64	1088(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	384(%rsp),%zmm1
	vmovdqa64	1152(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	448(%rsp),%zmm18
	vmovdqa64	1216(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	256(%rcx,%r11,1),%zmm17
	vmovdqu8	320(%rcx,%r11,1),%zmm19
	vmovdqu8	384(%rcx,%r11,1),%zmm20
	vmovdqu8	448(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vpternlogq	$0x96,%zmm12,%zmm6,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm31

	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,256(%r10,%r11,1)
	vmovdqu8	%zmm3,320(%r10,%r11,1)
	vmovdqu8	%zmm4,384(%r10,%r11,1)
	vmovdqu8	%zmm5,448(%r10,%r11,1)
	vpshufb	%zmm29,%zmm17,%zmm0
	vpshufb	%zmm29,%zmm19,%zmm3
	vpshufb	%zmm29,%zmm20,%zmm4
	vpshufb	%zmm29,%zmm21,%zmm5
	vmovdqa64	%zmm0,768(%rsp)
	vmovdqa64	%zmm3,832(%rsp)
	vmovdqa64	%zmm4,896(%rsp)
	vmovdqa64	%zmm5,960(%rsp)
	vmovdqa64	1280(%rsp),%zmm13
	vmovdqu64	512(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1344(%rsp),%zmm13
	vmovdqu64	576(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1408(%rsp),%zmm13
	vmovdqu64	640(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1472(%rsp),%zmm13
	vmovdqu64	704(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

	subq	$512,%r8
	addq	$512,%r11
	movl	%r8d,%r10d
	andl	$~15,%r10d
	movl	$512,%ebx
	subl	%r10d,%ebx
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_865

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_865
	jb	.L_last_num_blocks_is_7_1_865


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_865
	jb	.L_last_num_blocks_is_11_9_865


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_865
	ja	.L_last_num_blocks_is_16_865
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_865
	jmp	.L_last_num_blocks_is_13_865

.L_last_num_blocks_is_11_9_865:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_865
	ja	.L_last_num_blocks_is_11_865
	jmp	.L_last_num_blocks_is_9_865

.L_last_num_blocks_is_7_1_865:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_865
	jb	.L_last_num_blocks_is_3_1_865

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_865
	je	.L_last_num_blocks_is_6_865
	jmp	.L_last_num_blocks_is_5_865

.L_last_num_blocks_is_3_1_865:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_865
	je	.L_last_num_blocks_is_2_865
.L_last_num_blocks_is_1_865:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_866
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_866

.L_16_blocks_overflow_866:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_866:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%xmm29,%xmm17,%xmm17
	vextracti32x4	$0,%zmm17,%xmm7


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_867





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_867
.L_small_initial_partial_block_867:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)


	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm0


	vpclmulqdq	$0x01,%xmm25,%xmm0,%xmm3
	vpslldq	$8,%xmm3,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm3


	vpclmulqdq	$0x00,%xmm3,%xmm0,%xmm4
	vpsrldq	$4,%xmm4,%xmm4
	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm4,%xmm14












	vpxorq	%xmm7,%xmm14,%xmm14

	jmp	.L_after_reduction_867
.L_small_initial_compute_done_867:
.L_after_reduction_867:
	jmp	.L_last_blocks_done_865
.L_last_num_blocks_is_2_865:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$254,%r15d
	jae	.L_16_blocks_overflow_868
	vpaddd	%ymm28,%ymm2,%ymm0
	jmp	.L_16_blocks_ok_868

.L_16_blocks_overflow_868:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%ymm29,%ymm0,%ymm0
.L_16_blocks_ok_868:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%ymm17{%k1}{z}
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vaesenclast	%ymm30,%ymm0,%ymm0
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%ymm29,%ymm17,%ymm17
	vextracti32x4	$1,%zmm17,%xmm7
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_869





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_869
.L_small_initial_partial_block_869:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_869:

	orq	%r8,%r8
	je	.L_after_reduction_869
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_869:
	jmp	.L_last_blocks_done_865
.L_last_num_blocks_is_3_865:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$253,%r15d
	jae	.L_16_blocks_overflow_870
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_870

.L_16_blocks_overflow_870:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_870:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vextracti32x4	$2,%zmm17,%xmm7
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_871





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_871
.L_small_initial_partial_block_871:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_871:

	orq	%r8,%r8
	je	.L_after_reduction_871
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_871:
	jmp	.L_last_blocks_done_865
.L_last_num_blocks_is_4_865:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$252,%r15d
	jae	.L_16_blocks_overflow_872
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_872

.L_16_blocks_overflow_872:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_872:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vextracti32x4	$3,%zmm17,%xmm7
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_873





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_873
.L_small_initial_partial_block_873:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_873:

	orq	%r8,%r8
	je	.L_after_reduction_873
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_873:
	jmp	.L_last_blocks_done_865
.L_last_num_blocks_is_5_865:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$251,%r15d
	jae	.L_16_blocks_overflow_874
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%xmm27,%xmm0,%xmm3
	jmp	.L_16_blocks_ok_874

.L_16_blocks_overflow_874:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
.L_16_blocks_ok_874:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%xmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%xmm30,%xmm3,%xmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%xmm19,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%xmm29,%xmm19,%xmm19
	vextracti32x4	$0,%zmm19,%xmm7
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_875





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_875
.L_small_initial_partial_block_875:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_875:

	orq	%r8,%r8
	je	.L_after_reduction_875
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_875:
	jmp	.L_last_blocks_done_865
.L_last_num_blocks_is_6_865:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$250,%r15d
	jae	.L_16_blocks_overflow_876
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%ymm27,%ymm0,%ymm3
	jmp	.L_16_blocks_ok_876

.L_16_blocks_overflow_876:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
.L_16_blocks_ok_876:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%ymm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%ymm30,%ymm3,%ymm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%ymm29,%ymm19,%ymm19
	vextracti32x4	$1,%zmm19,%xmm7
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_877





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_877
.L_small_initial_partial_block_877:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_877:

	orq	%r8,%r8
	je	.L_after_reduction_877
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_877:
	jmp	.L_last_blocks_done_865
.L_last_num_blocks_is_7_865:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$249,%r15d
	jae	.L_16_blocks_overflow_878
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_878

.L_16_blocks_overflow_878:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_878:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vextracti32x4	$2,%zmm19,%xmm7
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_879





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_879
.L_small_initial_partial_block_879:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_879:

	orq	%r8,%r8
	je	.L_after_reduction_879
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_879:
	jmp	.L_last_blocks_done_865
.L_last_num_blocks_is_8_865:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$248,%r15d
	jae	.L_16_blocks_overflow_880
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_880

.L_16_blocks_overflow_880:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_880:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vextracti32x4	$3,%zmm19,%xmm7
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_881





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_881
.L_small_initial_partial_block_881:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_881:

	orq	%r8,%r8
	je	.L_after_reduction_881
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_881:
	jmp	.L_last_blocks_done_865
.L_last_num_blocks_is_9_865:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$247,%r15d
	jae	.L_16_blocks_overflow_882
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%xmm27,%xmm3,%xmm4
	jmp	.L_16_blocks_ok_882

.L_16_blocks_overflow_882:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
.L_16_blocks_ok_882:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%xmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%xmm30,%xmm4,%xmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%xmm20,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%xmm29,%xmm20,%xmm20
	vextracti32x4	$0,%zmm20,%xmm7
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_883





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_883
.L_small_initial_partial_block_883:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_883:

	orq	%r8,%r8
	je	.L_after_reduction_883
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_883:
	jmp	.L_last_blocks_done_865
.L_last_num_blocks_is_10_865:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$246,%r15d
	jae	.L_16_blocks_overflow_884
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%ymm27,%ymm3,%ymm4
	jmp	.L_16_blocks_ok_884

.L_16_blocks_overflow_884:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
.L_16_blocks_ok_884:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%ymm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%ymm30,%ymm4,%ymm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%ymm20,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%ymm29,%ymm20,%ymm20
	vextracti32x4	$1,%zmm20,%xmm7
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_885





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_885
.L_small_initial_partial_block_885:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_885:

	orq	%r8,%r8
	je	.L_after_reduction_885
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_885:
	jmp	.L_last_blocks_done_865
.L_last_num_blocks_is_11_865:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$245,%r15d
	jae	.L_16_blocks_overflow_886
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_886

.L_16_blocks_overflow_886:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_886:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vextracti32x4	$2,%zmm20,%xmm7
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_887





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_887
.L_small_initial_partial_block_887:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_887:

	orq	%r8,%r8
	je	.L_after_reduction_887
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_887:
	jmp	.L_last_blocks_done_865
.L_last_num_blocks_is_12_865:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$244,%r15d
	jae	.L_16_blocks_overflow_888
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_888

.L_16_blocks_overflow_888:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_888:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vextracti32x4	$3,%zmm20,%xmm7
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_889





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_889
.L_small_initial_partial_block_889:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_889:

	orq	%r8,%r8
	je	.L_after_reduction_889
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_889:
	jmp	.L_last_blocks_done_865
.L_last_num_blocks_is_13_865:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$243,%r15d
	jae	.L_16_blocks_overflow_890
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%xmm27,%xmm4,%xmm5
	jmp	.L_16_blocks_ok_890

.L_16_blocks_overflow_890:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
.L_16_blocks_ok_890:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%xmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%xmm30,%xmm5,%xmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%xmm21,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%xmm29,%xmm21,%xmm21
	vextracti32x4	$0,%zmm21,%xmm7
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_891





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_891
.L_small_initial_partial_block_891:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_891:

	orq	%r8,%r8
	je	.L_after_reduction_891
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_891:
	jmp	.L_last_blocks_done_865
.L_last_num_blocks_is_14_865:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$242,%r15d
	jae	.L_16_blocks_overflow_892
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%ymm27,%ymm4,%ymm5
	jmp	.L_16_blocks_ok_892

.L_16_blocks_overflow_892:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
.L_16_blocks_ok_892:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%ymm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%ymm30,%ymm5,%ymm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%ymm21,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%ymm29,%ymm21,%ymm21
	vextracti32x4	$1,%zmm21,%xmm7
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_893





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_893
.L_small_initial_partial_block_893:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_893:

	orq	%r8,%r8
	je	.L_after_reduction_893
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_893:
	jmp	.L_last_blocks_done_865
.L_last_num_blocks_is_15_865:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$241,%r15d
	jae	.L_16_blocks_overflow_894
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_894

.L_16_blocks_overflow_894:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_894:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%zmm29,%zmm21,%zmm21
	vextracti32x4	$2,%zmm21,%xmm7
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_895





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_895
.L_small_initial_partial_block_895:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_895:

	orq	%r8,%r8
	je	.L_after_reduction_895
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_895:
	jmp	.L_last_blocks_done_865
.L_last_num_blocks_is_16_865:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$240,%r15d
	jae	.L_16_blocks_overflow_896
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_896

.L_16_blocks_overflow_896:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_896:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%zmm29,%zmm21,%zmm21
	vextracti32x4	$3,%zmm21,%xmm7
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_897:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_897:
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_897:
	jmp	.L_last_blocks_done_865
.L_last_num_blocks_is_0_865:
	vmovdqa64	768(%rsp),%zmm13
	vpxorq	%zmm14,%zmm13,%zmm13
	vmovdqu64	0(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	832(%rsp),%zmm13
	vmovdqu64	64(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpxorq	%zmm10,%zmm4,%zmm26
	vpxorq	%zmm6,%zmm0,%zmm24
	vpxorq	%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	896(%rsp),%zmm13
	vmovdqu64	128(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	960(%rsp),%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

.L_last_blocks_done_865:
	vpshufb	%xmm29,%xmm2,%xmm2
	jmp	.L_ghash_done_821
.L_encrypt_16_blocks_821:
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_898
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_898
.L_16_blocks_overflow_898:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_898:
	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp),%zmm18
	vmovdqa64	832(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp),%zmm1
	vmovdqa64	896(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp),%zmm18
	vmovdqa64	960(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm15,%zmm10,%zmm26
	vpxorq	%zmm12,%zmm6,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm31

	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1)
	vpshufb	%zmm29,%zmm17,%zmm0
	vpshufb	%zmm29,%zmm19,%zmm3
	vpshufb	%zmm29,%zmm20,%zmm4
	vpshufb	%zmm29,%zmm21,%zmm5
	vmovdqa64	%zmm0,1280(%rsp)
	vmovdqa64	%zmm3,1344(%rsp)
	vmovdqa64	%zmm4,1408(%rsp)
	vmovdqa64	%zmm5,1472(%rsp)
	vmovdqa64	1024(%rsp),%zmm13
	vmovdqu64	256(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1088(%rsp),%zmm13
	vmovdqu64	320(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1152(%rsp),%zmm13
	vmovdqu64	384(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1216(%rsp),%zmm13
	vmovdqu64	448(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	subq	$256,%r8
	addq	$256,%r11
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_899

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_899
	jb	.L_last_num_blocks_is_7_1_899


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_899
	jb	.L_last_num_blocks_is_11_9_899


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_899
	ja	.L_last_num_blocks_is_16_899
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_899
	jmp	.L_last_num_blocks_is_13_899

.L_last_num_blocks_is_11_9_899:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_899
	ja	.L_last_num_blocks_is_11_899
	jmp	.L_last_num_blocks_is_9_899

.L_last_num_blocks_is_7_1_899:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_899
	jb	.L_last_num_blocks_is_3_1_899

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_899
	je	.L_last_num_blocks_is_6_899
	jmp	.L_last_num_blocks_is_5_899

.L_last_num_blocks_is_3_1_899:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_899
	je	.L_last_num_blocks_is_2_899
.L_last_num_blocks_is_1_899:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_900
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_900

.L_16_blocks_overflow_900:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_900:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%xmm31,%xmm0,%xmm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%xmm29,%xmm17,%xmm17
	vextracti32x4	$0,%zmm17,%xmm7


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_901





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_901
.L_small_initial_partial_block_901:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)











	vpxorq	%xmm7,%xmm14,%xmm14

	jmp	.L_after_reduction_901
.L_small_initial_compute_done_901:
.L_after_reduction_901:
	jmp	.L_last_blocks_done_899
.L_last_num_blocks_is_2_899:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$254,%r15d
	jae	.L_16_blocks_overflow_902
	vpaddd	%ymm28,%ymm2,%ymm0
	jmp	.L_16_blocks_ok_902

.L_16_blocks_overflow_902:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%ymm29,%ymm0,%ymm0
.L_16_blocks_ok_902:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%ymm17{%k1}{z}
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%ymm31,%ymm0,%ymm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%ymm30,%ymm0,%ymm0
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%ymm29,%ymm17,%ymm17
	vextracti32x4	$1,%zmm17,%xmm7
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_903





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_903
.L_small_initial_partial_block_903:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_903:

	orq	%r8,%r8
	je	.L_after_reduction_903
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_903:
	jmp	.L_last_blocks_done_899
.L_last_num_blocks_is_3_899:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$253,%r15d
	jae	.L_16_blocks_overflow_904
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_904

.L_16_blocks_overflow_904:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_904:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vextracti32x4	$2,%zmm17,%xmm7
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_905





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_905
.L_small_initial_partial_block_905:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_905:

	orq	%r8,%r8
	je	.L_after_reduction_905
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_905:
	jmp	.L_last_blocks_done_899
.L_last_num_blocks_is_4_899:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$252,%r15d
	jae	.L_16_blocks_overflow_906
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_906

.L_16_blocks_overflow_906:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_906:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vextracti32x4	$3,%zmm17,%xmm7
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_907





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_907
.L_small_initial_partial_block_907:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_907:

	orq	%r8,%r8
	je	.L_after_reduction_907
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_907:
	jmp	.L_last_blocks_done_899
.L_last_num_blocks_is_5_899:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$251,%r15d
	jae	.L_16_blocks_overflow_908
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%xmm27,%xmm0,%xmm3
	jmp	.L_16_blocks_ok_908

.L_16_blocks_overflow_908:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
.L_16_blocks_ok_908:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%xmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%xmm30,%xmm3,%xmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%xmm19,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%xmm29,%xmm19,%xmm19
	vextracti32x4	$0,%zmm19,%xmm7
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_909





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_909
.L_small_initial_partial_block_909:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_909:

	orq	%r8,%r8
	je	.L_after_reduction_909
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_909:
	jmp	.L_last_blocks_done_899
.L_last_num_blocks_is_6_899:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$250,%r15d
	jae	.L_16_blocks_overflow_910
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%ymm27,%ymm0,%ymm3
	jmp	.L_16_blocks_ok_910

.L_16_blocks_overflow_910:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
.L_16_blocks_ok_910:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%ymm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%ymm30,%ymm3,%ymm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%ymm29,%ymm19,%ymm19
	vextracti32x4	$1,%zmm19,%xmm7
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_911





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_911
.L_small_initial_partial_block_911:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_911:

	orq	%r8,%r8
	je	.L_after_reduction_911
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_911:
	jmp	.L_last_blocks_done_899
.L_last_num_blocks_is_7_899:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$249,%r15d
	jae	.L_16_blocks_overflow_912
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_912

.L_16_blocks_overflow_912:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_912:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vextracti32x4	$2,%zmm19,%xmm7
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_913





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_913
.L_small_initial_partial_block_913:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_913:

	orq	%r8,%r8
	je	.L_after_reduction_913
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_913:
	jmp	.L_last_blocks_done_899
.L_last_num_blocks_is_8_899:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$248,%r15d
	jae	.L_16_blocks_overflow_914
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_914

.L_16_blocks_overflow_914:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_914:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vextracti32x4	$3,%zmm19,%xmm7
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_915





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_915
.L_small_initial_partial_block_915:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_915:

	orq	%r8,%r8
	je	.L_after_reduction_915
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_915:
	jmp	.L_last_blocks_done_899
.L_last_num_blocks_is_9_899:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$247,%r15d
	jae	.L_16_blocks_overflow_916
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%xmm27,%xmm3,%xmm4
	jmp	.L_16_blocks_ok_916

.L_16_blocks_overflow_916:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
.L_16_blocks_ok_916:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%xmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%xmm30,%xmm4,%xmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%xmm20,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%xmm29,%xmm20,%xmm20
	vextracti32x4	$0,%zmm20,%xmm7
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_917





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_917
.L_small_initial_partial_block_917:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_917:

	orq	%r8,%r8
	je	.L_after_reduction_917
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_917:
	jmp	.L_last_blocks_done_899
.L_last_num_blocks_is_10_899:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$246,%r15d
	jae	.L_16_blocks_overflow_918
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%ymm27,%ymm3,%ymm4
	jmp	.L_16_blocks_ok_918

.L_16_blocks_overflow_918:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
.L_16_blocks_ok_918:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%ymm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%ymm30,%ymm4,%ymm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%ymm20,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%ymm29,%ymm20,%ymm20
	vextracti32x4	$1,%zmm20,%xmm7
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_919





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_919
.L_small_initial_partial_block_919:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_919:

	orq	%r8,%r8
	je	.L_after_reduction_919
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_919:
	jmp	.L_last_blocks_done_899
.L_last_num_blocks_is_11_899:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$245,%r15d
	jae	.L_16_blocks_overflow_920
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_920

.L_16_blocks_overflow_920:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_920:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vextracti32x4	$2,%zmm20,%xmm7
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_921





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_921
.L_small_initial_partial_block_921:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_921:

	orq	%r8,%r8
	je	.L_after_reduction_921
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_921:
	jmp	.L_last_blocks_done_899
.L_last_num_blocks_is_12_899:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$244,%r15d
	jae	.L_16_blocks_overflow_922
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_922

.L_16_blocks_overflow_922:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_922:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vextracti32x4	$3,%zmm20,%xmm7
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_923





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_923
.L_small_initial_partial_block_923:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_923:

	orq	%r8,%r8
	je	.L_after_reduction_923
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_923:
	jmp	.L_last_blocks_done_899
.L_last_num_blocks_is_13_899:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$243,%r15d
	jae	.L_16_blocks_overflow_924
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%xmm27,%xmm4,%xmm5
	jmp	.L_16_blocks_ok_924

.L_16_blocks_overflow_924:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
.L_16_blocks_ok_924:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%xmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%xmm30,%xmm5,%xmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%xmm21,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%xmm29,%xmm21,%xmm21
	vextracti32x4	$0,%zmm21,%xmm7
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_925





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_925
.L_small_initial_partial_block_925:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_925:

	orq	%r8,%r8
	je	.L_after_reduction_925
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_925:
	jmp	.L_last_blocks_done_899
.L_last_num_blocks_is_14_899:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$242,%r15d
	jae	.L_16_blocks_overflow_926
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%ymm27,%ymm4,%ymm5
	jmp	.L_16_blocks_ok_926

.L_16_blocks_overflow_926:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
.L_16_blocks_ok_926:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%ymm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%ymm30,%ymm5,%ymm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%ymm21,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%ymm29,%ymm21,%ymm21
	vextracti32x4	$1,%zmm21,%xmm7
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_927





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_927
.L_small_initial_partial_block_927:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_927:

	orq	%r8,%r8
	je	.L_after_reduction_927
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_927:
	jmp	.L_last_blocks_done_899
.L_last_num_blocks_is_15_899:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$241,%r15d
	jae	.L_16_blocks_overflow_928
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_928

.L_16_blocks_overflow_928:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_928:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%zmm29,%zmm21,%zmm21
	vextracti32x4	$2,%zmm21,%xmm7
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_929





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_929
.L_small_initial_partial_block_929:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_929:

	orq	%r8,%r8
	je	.L_after_reduction_929
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_929:
	jmp	.L_last_blocks_done_899
.L_last_num_blocks_is_16_899:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$240,%r15d
	jae	.L_16_blocks_overflow_930
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_930

.L_16_blocks_overflow_930:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_930:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%zmm29,%zmm21,%zmm21
	vextracti32x4	$3,%zmm21,%xmm7
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_931:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_931:
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_931:
	jmp	.L_last_blocks_done_899
.L_last_num_blocks_is_0_899:
	vmovdqa64	1280(%rsp),%zmm13
	vmovdqu64	512(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1344(%rsp),%zmm13
	vmovdqu64	576(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1408(%rsp),%zmm13
	vmovdqu64	640(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1472(%rsp),%zmm13
	vmovdqu64	704(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

.L_last_blocks_done_899:
	vpshufb	%xmm29,%xmm2,%xmm2
	jmp	.L_ghash_done_821

.L_message_below_32_blocks_821:


	subq	$256,%r8
	addq	$256,%r11
	movl	%r8d,%r10d
	testq	%r14,%r14
	jnz	.L_skip_hkeys_precomputation_932
	vmovdqu64	640(%rsp),%zmm3


	vshufi64x2	$0x00,%zmm3,%zmm3,%zmm3

	vmovdqu64	576(%rsp),%zmm4
	vmovdqu64	512(%rsp),%zmm5

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,448(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,384(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,320(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,256(%rsp)
.L_skip_hkeys_precomputation_932:
	movq	$1,%r14
	andl	$~15,%r10d
	movl	$512,%ebx
	subl	%r10d,%ebx
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_933

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_933
	jb	.L_last_num_blocks_is_7_1_933


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_933
	jb	.L_last_num_blocks_is_11_9_933


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_933
	ja	.L_last_num_blocks_is_16_933
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_933
	jmp	.L_last_num_blocks_is_13_933

.L_last_num_blocks_is_11_9_933:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_933
	ja	.L_last_num_blocks_is_11_933
	jmp	.L_last_num_blocks_is_9_933

.L_last_num_blocks_is_7_1_933:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_933
	jb	.L_last_num_blocks_is_3_1_933

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_933
	je	.L_last_num_blocks_is_6_933
	jmp	.L_last_num_blocks_is_5_933

.L_last_num_blocks_is_3_1_933:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_933
	je	.L_last_num_blocks_is_2_933
.L_last_num_blocks_is_1_933:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_934
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_934

.L_16_blocks_overflow_934:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_934:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%xmm29,%xmm17,%xmm17
	vextracti32x4	$0,%zmm17,%xmm7


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_935





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_935
.L_small_initial_partial_block_935:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)


	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm0


	vpclmulqdq	$0x01,%xmm25,%xmm0,%xmm3
	vpslldq	$8,%xmm3,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm3


	vpclmulqdq	$0x00,%xmm3,%xmm0,%xmm4
	vpsrldq	$4,%xmm4,%xmm4
	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm4,%xmm14












	vpxorq	%xmm7,%xmm14,%xmm14

	jmp	.L_after_reduction_935
.L_small_initial_compute_done_935:
.L_after_reduction_935:
	jmp	.L_last_blocks_done_933
.L_last_num_blocks_is_2_933:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$254,%r15d
	jae	.L_16_blocks_overflow_936
	vpaddd	%ymm28,%ymm2,%ymm0
	jmp	.L_16_blocks_ok_936

.L_16_blocks_overflow_936:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%ymm29,%ymm0,%ymm0
.L_16_blocks_ok_936:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%ymm17{%k1}{z}
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vaesenclast	%ymm30,%ymm0,%ymm0
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%ymm29,%ymm17,%ymm17
	vextracti32x4	$1,%zmm17,%xmm7
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_937





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_937
.L_small_initial_partial_block_937:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_937:

	orq	%r8,%r8
	je	.L_after_reduction_937
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_937:
	jmp	.L_last_blocks_done_933
.L_last_num_blocks_is_3_933:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$253,%r15d
	jae	.L_16_blocks_overflow_938
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_938

.L_16_blocks_overflow_938:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_938:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vextracti32x4	$2,%zmm17,%xmm7
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_939





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_939
.L_small_initial_partial_block_939:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_939:

	orq	%r8,%r8
	je	.L_after_reduction_939
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_939:
	jmp	.L_last_blocks_done_933
.L_last_num_blocks_is_4_933:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$252,%r15d
	jae	.L_16_blocks_overflow_940
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_940

.L_16_blocks_overflow_940:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_940:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm17,%zmm17{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vextracti32x4	$3,%zmm17,%xmm7
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_941





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_941
.L_small_initial_partial_block_941:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_941:

	orq	%r8,%r8
	je	.L_after_reduction_941
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_941:
	jmp	.L_last_blocks_done_933
.L_last_num_blocks_is_5_933:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$251,%r15d
	jae	.L_16_blocks_overflow_942
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%xmm27,%xmm0,%xmm3
	jmp	.L_16_blocks_ok_942

.L_16_blocks_overflow_942:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
.L_16_blocks_ok_942:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%xmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%xmm30,%xmm3,%xmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%xmm19,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%xmm29,%xmm19,%xmm19
	vextracti32x4	$0,%zmm19,%xmm7
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_943





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_943
.L_small_initial_partial_block_943:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_943:

	orq	%r8,%r8
	je	.L_after_reduction_943
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_943:
	jmp	.L_last_blocks_done_933
.L_last_num_blocks_is_6_933:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$250,%r15d
	jae	.L_16_blocks_overflow_944
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%ymm27,%ymm0,%ymm3
	jmp	.L_16_blocks_ok_944

.L_16_blocks_overflow_944:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
.L_16_blocks_ok_944:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%ymm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%ymm30,%ymm3,%ymm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%ymm29,%ymm19,%ymm19
	vextracti32x4	$1,%zmm19,%xmm7
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_945





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_945
.L_small_initial_partial_block_945:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_945:

	orq	%r8,%r8
	je	.L_after_reduction_945
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_945:
	jmp	.L_last_blocks_done_933
.L_last_num_blocks_is_7_933:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$249,%r15d
	jae	.L_16_blocks_overflow_946
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_946

.L_16_blocks_overflow_946:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_946:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vextracti32x4	$2,%zmm19,%xmm7
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_947





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_947
.L_small_initial_partial_block_947:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_947:

	orq	%r8,%r8
	je	.L_after_reduction_947
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_947:
	jmp	.L_last_blocks_done_933
.L_last_num_blocks_is_8_933:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$248,%r15d
	jae	.L_16_blocks_overflow_948
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_948

.L_16_blocks_overflow_948:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_948:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm19,%zmm19{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vextracti32x4	$3,%zmm19,%xmm7
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_949





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_949
.L_small_initial_partial_block_949:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_949:

	orq	%r8,%r8
	je	.L_after_reduction_949
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_949:
	jmp	.L_last_blocks_done_933
.L_last_num_blocks_is_9_933:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$247,%r15d
	jae	.L_16_blocks_overflow_950
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%xmm27,%xmm3,%xmm4
	jmp	.L_16_blocks_ok_950

.L_16_blocks_overflow_950:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
.L_16_blocks_ok_950:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%xmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%xmm30,%xmm4,%xmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%xmm20,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%xmm29,%xmm20,%xmm20
	vextracti32x4	$0,%zmm20,%xmm7
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_951





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_951
.L_small_initial_partial_block_951:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_951:

	orq	%r8,%r8
	je	.L_after_reduction_951
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_951:
	jmp	.L_last_blocks_done_933
.L_last_num_blocks_is_10_933:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$246,%r15d
	jae	.L_16_blocks_overflow_952
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%ymm27,%ymm3,%ymm4
	jmp	.L_16_blocks_ok_952

.L_16_blocks_overflow_952:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
.L_16_blocks_ok_952:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%ymm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%ymm30,%ymm4,%ymm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%ymm20,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%ymm29,%ymm20,%ymm20
	vextracti32x4	$1,%zmm20,%xmm7
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_953





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_953
.L_small_initial_partial_block_953:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_953:

	orq	%r8,%r8
	je	.L_after_reduction_953
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_953:
	jmp	.L_last_blocks_done_933
.L_last_num_blocks_is_11_933:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$245,%r15d
	jae	.L_16_blocks_overflow_954
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_954

.L_16_blocks_overflow_954:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_954:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vextracti32x4	$2,%zmm20,%xmm7
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_955





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_955
.L_small_initial_partial_block_955:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_955:

	orq	%r8,%r8
	je	.L_after_reduction_955
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_955:
	jmp	.L_last_blocks_done_933
.L_last_num_blocks_is_12_933:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$244,%r15d
	jae	.L_16_blocks_overflow_956
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_956

.L_16_blocks_overflow_956:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_956:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm20,%zmm20{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vextracti32x4	$3,%zmm20,%xmm7
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_957





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_957
.L_small_initial_partial_block_957:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_957:

	orq	%r8,%r8
	je	.L_after_reduction_957
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_957:
	jmp	.L_last_blocks_done_933
.L_last_num_blocks_is_13_933:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$243,%r15d
	jae	.L_16_blocks_overflow_958
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%xmm27,%xmm4,%xmm5
	jmp	.L_16_blocks_ok_958

.L_16_blocks_overflow_958:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
.L_16_blocks_ok_958:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%xmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%xmm30,%xmm5,%xmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%xmm21,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%xmm29,%xmm21,%xmm21
	vextracti32x4	$0,%zmm21,%xmm7
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_959





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_959
.L_small_initial_partial_block_959:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_959:

	orq	%r8,%r8
	je	.L_after_reduction_959
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_959:
	jmp	.L_last_blocks_done_933
.L_last_num_blocks_is_14_933:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$242,%r15d
	jae	.L_16_blocks_overflow_960
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%ymm27,%ymm4,%ymm5
	jmp	.L_16_blocks_ok_960

.L_16_blocks_overflow_960:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
.L_16_blocks_ok_960:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%ymm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%ymm30,%ymm5,%ymm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%ymm21,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%ymm29,%ymm21,%ymm21
	vextracti32x4	$1,%zmm21,%xmm7
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_961





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_961
.L_small_initial_partial_block_961:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_961:

	orq	%r8,%r8
	je	.L_after_reduction_961
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_961:
	jmp	.L_last_blocks_done_933
.L_last_num_blocks_is_15_933:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$241,%r15d
	jae	.L_16_blocks_overflow_962
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_962

.L_16_blocks_overflow_962:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_962:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%zmm29,%zmm21,%zmm21
	vextracti32x4	$2,%zmm21,%xmm7
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_963





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_963
.L_small_initial_partial_block_963:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_963:

	orq	%r8,%r8
	je	.L_after_reduction_963
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_963:
	jmp	.L_last_blocks_done_933
.L_last_num_blocks_is_16_933:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$240,%r15d
	jae	.L_16_blocks_overflow_964
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_964

.L_16_blocks_overflow_964:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_964:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm21,%zmm21{%k1}{z}
	vpshufb	%zmm29,%zmm17,%zmm17
	vpshufb	%zmm29,%zmm19,%zmm19
	vpshufb	%zmm29,%zmm20,%zmm20
	vpshufb	%zmm29,%zmm21,%zmm21
	vextracti32x4	$3,%zmm21,%xmm7
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_965:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_965:
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_965:
	jmp	.L_last_blocks_done_933
.L_last_num_blocks_is_0_933:
	vmovdqa64	768(%rsp),%zmm13
	vpxorq	%zmm14,%zmm13,%zmm13
	vmovdqu64	0(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	832(%rsp),%zmm13
	vmovdqu64	64(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpxorq	%zmm10,%zmm4,%zmm26
	vpxorq	%zmm6,%zmm0,%zmm24
	vpxorq	%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	896(%rsp),%zmm13
	vmovdqu64	128(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	960(%rsp),%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

.L_last_blocks_done_933:
	vpshufb	%xmm29,%xmm2,%xmm2
	jmp	.L_ghash_done_821

.L_message_below_equal_16_blocks_821:


	movl	%r8d,%r12d
	addl	$15,%r12d
	shrl	$4,%r12d
	cmpq	$8,%r12
	je	.L_small_initial_num_blocks_is_8_966
	jl	.L_small_initial_num_blocks_is_7_1_966


	cmpq	$12,%r12
	je	.L_small_initial_num_blocks_is_12_966
	jl	.L_small_initial_num_blocks_is_11_9_966


	cmpq	$16,%r12
	je	.L_small_initial_num_blocks_is_16_966
	cmpq	$15,%r12
	je	.L_small_initial_num_blocks_is_15_966
	cmpq	$14,%r12
	je	.L_small_initial_num_blocks_is_14_966
	jmp	.L_small_initial_num_blocks_is_13_966

.L_small_initial_num_blocks_is_11_9_966:

	cmpq	$11,%r12
	je	.L_small_initial_num_blocks_is_11_966
	cmpq	$10,%r12
	je	.L_small_initial_num_blocks_is_10_966
	jmp	.L_small_initial_num_blocks_is_9_966

.L_small_initial_num_blocks_is_7_1_966:
	cmpq	$4,%r12
	je	.L_small_initial_num_blocks_is_4_966
	jl	.L_small_initial_num_blocks_is_3_1_966

	cmpq	$7,%r12
	je	.L_small_initial_num_blocks_is_7_966
	cmpq	$6,%r12
	je	.L_small_initial_num_blocks_is_6_966
	jmp	.L_small_initial_num_blocks_is_5_966

.L_small_initial_num_blocks_is_3_1_966:

	cmpq	$3,%r12
	je	.L_small_initial_num_blocks_is_3_966
	cmpq	$2,%r12
	je	.L_small_initial_num_blocks_is_2_966





.L_small_initial_num_blocks_is_1_966:
	vmovdqa64	SHUF_MASK(%rip),%xmm29
	vpaddd	ONE(%rip),%xmm2,%xmm0
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$0,%zmm0,%xmm2
	vpshufb	%xmm29,%xmm0,%xmm0
	vmovdqu8	0(%rcx,%r11,1),%xmm6{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%xmm15,%xmm0,%xmm0
	vpxorq	%xmm6,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm12
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%xmm29,%xmm6,%xmm6
	vextracti32x4	$0,%zmm6,%xmm13


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_967





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm6,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm6,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm6,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm6,%xmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_967
.L_small_initial_partial_block_967:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)











	vpxorq	%xmm13,%xmm14,%xmm14

	jmp	.L_after_reduction_967
.L_small_initial_compute_done_967:
.L_after_reduction_967:
	jmp	.L_small_initial_blocks_encrypted_966
.L_small_initial_num_blocks_is_2_966:
	vmovdqa64	SHUF_MASK(%rip),%ymm29
	vshufi64x2	$0,%ymm2,%ymm2,%ymm0
	vpaddd	ddq_add_1234(%rip),%ymm0,%ymm0
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$1,%zmm0,%xmm2
	vpshufb	%ymm29,%ymm0,%ymm0
	vmovdqu8	0(%rcx,%r11,1),%ymm6{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%ymm15,%ymm0,%ymm0
	vpxorq	%ymm6,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm12
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%ymm29,%ymm6,%ymm6
	vextracti32x4	$1,%zmm6,%xmm13
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_968





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm6,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm6,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm6,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm6,%ymm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_968
.L_small_initial_partial_block_968:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm6,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm6,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm6,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm6,%xmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_968:

	orq	%r8,%r8
	je	.L_after_reduction_968
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_968:
	jmp	.L_small_initial_blocks_encrypted_966
.L_small_initial_num_blocks_is_3_966:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$2,%zmm0,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vmovdqu8	0(%rcx,%r11,1),%zmm6{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm6,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vextracti32x4	$2,%zmm6,%xmm13
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_969





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_969
.L_small_initial_partial_block_969:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm6,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm6,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm6,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm6,%ymm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_969:

	orq	%r8,%r8
	je	.L_after_reduction_969
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_969:
	jmp	.L_small_initial_blocks_encrypted_966
.L_small_initial_num_blocks_is_4_966:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$3,%zmm0,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vmovdqu8	0(%rcx,%r11,1),%zmm6{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm6,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vextracti32x4	$3,%zmm6,%xmm13
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_970





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_970
.L_small_initial_partial_block_970:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_970:

	orq	%r8,%r8
	je	.L_after_reduction_970
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_970:
	jmp	.L_small_initial_blocks_encrypted_966
.L_small_initial_num_blocks_is_5_966:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$64,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$0,%zmm3,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%xmm7{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%xmm15,%xmm3,%xmm3
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%xmm7,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%xmm29,%xmm7,%xmm7
	vextracti32x4	$0,%zmm7,%xmm13
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_971





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm7,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm7,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm7,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm7,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_971
.L_small_initial_partial_block_971:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_971:

	orq	%r8,%r8
	je	.L_after_reduction_971
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_971:
	jmp	.L_small_initial_blocks_encrypted_966
.L_small_initial_num_blocks_is_6_966:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$64,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$1,%zmm3,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%ymm7{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%ymm15,%ymm3,%ymm3
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%ymm7,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%ymm29,%ymm7,%ymm7
	vextracti32x4	$1,%zmm7,%xmm13
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_972





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm7,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm7,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm7,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm7,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_972
.L_small_initial_partial_block_972:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm7,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm7,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm7,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm7,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_972:

	orq	%r8,%r8
	je	.L_after_reduction_972
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_972:
	jmp	.L_small_initial_blocks_encrypted_966
.L_small_initial_num_blocks_is_7_966:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$64,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$2,%zmm3,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%zmm29,%zmm7,%zmm7
	vextracti32x4	$2,%zmm7,%xmm13
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_973





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_973
.L_small_initial_partial_block_973:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm7,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm7,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm7,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm7,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_973:

	orq	%r8,%r8
	je	.L_after_reduction_973
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_973:
	jmp	.L_small_initial_blocks_encrypted_966
.L_small_initial_num_blocks_is_8_966:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$64,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$3,%zmm3,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%zmm29,%zmm7,%zmm7
	vextracti32x4	$3,%zmm7,%xmm13
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_974





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	224(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_974
.L_small_initial_partial_block_974:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_974:

	orq	%r8,%r8
	je	.L_after_reduction_974
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_974:
	jmp	.L_small_initial_blocks_encrypted_966
.L_small_initial_num_blocks_is_9_966:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$128,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$0,%zmm4,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%xmm10{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%xmm15,%xmm4,%xmm4
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%xmm10,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%xmm29,%xmm10,%xmm10
	vextracti32x4	$0,%zmm10,%xmm13
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_975





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	208(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm10,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm10,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm10,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm10,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_975
.L_small_initial_partial_block_975:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	224(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_975:

	orq	%r8,%r8
	je	.L_after_reduction_975
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_975:
	jmp	.L_small_initial_blocks_encrypted_966
.L_small_initial_num_blocks_is_10_966:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$128,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$1,%zmm4,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%ymm10{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%ymm15,%ymm4,%ymm4
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%ymm10,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%ymm29,%ymm10,%ymm10
	vextracti32x4	$1,%zmm10,%xmm13
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_976





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	192(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm10,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm10,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm10,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm10,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_976
.L_small_initial_partial_block_976:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	208(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm10,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm10,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm10,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm10,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_976:

	orq	%r8,%r8
	je	.L_after_reduction_976
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_976:
	jmp	.L_small_initial_blocks_encrypted_966
.L_small_initial_num_blocks_is_11_966:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$128,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$2,%zmm4,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vextracti32x4	$2,%zmm10,%xmm13
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_977





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	176(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_977
.L_small_initial_partial_block_977:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	192(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm10,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm10,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm10,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm10,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_977:

	orq	%r8,%r8
	je	.L_after_reduction_977
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_977:
	jmp	.L_small_initial_blocks_encrypted_966
.L_small_initial_num_blocks_is_12_966:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$128,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$3,%zmm4,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vextracti32x4	$3,%zmm10,%xmm13
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_978





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	160(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	224(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_978
.L_small_initial_partial_block_978:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	176(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_978:

	orq	%r8,%r8
	je	.L_after_reduction_978
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_978:
	jmp	.L_small_initial_blocks_encrypted_966
.L_small_initial_num_blocks_is_13_966:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	vpaddd	ddq_add_8888(%rip),%zmm3,%zmm5
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$192,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$0,%zmm5,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10
	vmovdqu8	192(%rcx,%r11,1),%xmm11{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vpxorq	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vaesenclast	%xmm15,%xmm5,%xmm5
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vpxorq	%xmm11,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%xmm29,%xmm11,%xmm11
	vextracti32x4	$0,%zmm11,%xmm13
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_979





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	144(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	208(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm11,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm11,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm11,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm11,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_979
.L_small_initial_partial_block_979:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	160(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	224(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_979:

	orq	%r8,%r8
	je	.L_after_reduction_979
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_979:
	jmp	.L_small_initial_blocks_encrypted_966
.L_small_initial_num_blocks_is_14_966:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	vpaddd	ddq_add_8888(%rip),%zmm3,%zmm5
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$192,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$1,%zmm5,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10
	vmovdqu8	192(%rcx,%r11,1),%ymm11{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vpxorq	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vaesenclast	%ymm15,%ymm5,%ymm5
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vpxorq	%ymm11,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%ymm29,%ymm11,%ymm11
	vextracti32x4	$1,%zmm11,%xmm13
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_980





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	128(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	192(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm11,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm11,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm11,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm11,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_980
.L_small_initial_partial_block_980:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	144(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	208(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm11,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm11,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm11,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm11,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_980:

	orq	%r8,%r8
	je	.L_after_reduction_980
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_980:
	jmp	.L_small_initial_blocks_encrypted_966
.L_small_initial_num_blocks_is_15_966:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	vpaddd	ddq_add_8888(%rip),%zmm3,%zmm5
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$192,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$2,%zmm5,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10
	vmovdqu8	192(%rcx,%r11,1),%zmm11{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vpxorq	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vaesenclast	%zmm15,%zmm5,%zmm5
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vpxorq	%zmm11,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vextracti32x4	$2,%zmm11,%xmm13
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_981





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	112(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	176(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm11,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm11,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm11,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm11,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_981
.L_small_initial_partial_block_981:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	128(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	192(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm11,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm11,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm11,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm11,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_981:

	orq	%r8,%r8
	je	.L_after_reduction_981
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_981:
	jmp	.L_small_initial_blocks_encrypted_966
.L_small_initial_num_blocks_is_16_966:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	vpaddd	ddq_add_8888(%rip),%zmm3,%zmm5
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$192,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$3,%zmm5,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10
	vmovdqu8	192(%rcx,%r11,1),%zmm11{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vpxorq	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	208(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	224(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vaesenclast	%zmm15,%zmm5,%zmm5
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vpxorq	%zmm11,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm6,%zmm6
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vextracti32x4	$3,%zmm11,%xmm13
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_982:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	112(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	176(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm11,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm11,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm11,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm11,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_982:
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_982:
.L_small_initial_blocks_encrypted_966:
.L_ghash_done_821:
	vmovdqu64	%xmm2,0(%rsi)
	vmovdqu64	%xmm14,64(%rsi)
.L_enc_dec_done_821:
	jmp	.Lexit_gcm_decrypt
.Lexit_gcm_decrypt:
	cmpq	$256,%r8
	jbe	.Lskip_hkeys_cleanup_983
	vpxor	%xmm0,%xmm0,%xmm0
	vmovdqa64	%zmm0,0(%rsp)
	vmovdqa64	%zmm0,64(%rsp)
	vmovdqa64	%zmm0,128(%rsp)
	vmovdqa64	%zmm0,192(%rsp)
	vmovdqa64	%zmm0,256(%rsp)
	vmovdqa64	%zmm0,320(%rsp)
	vmovdqa64	%zmm0,384(%rsp)
	vmovdqa64	%zmm0,448(%rsp)
	vmovdqa64	%zmm0,512(%rsp)
	vmovdqa64	%zmm0,576(%rsp)
	vmovdqa64	%zmm0,640(%rsp)
	vmovdqa64	%zmm0,704(%rsp)
.Lskip_hkeys_cleanup_983:
	vzeroupper
	leaq	(%rbp),%rsp
.cfi_def_cfa_register	%rsp
	popq	%r15
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r15
	popq	%r14
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r14
	popq	%r13
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r13
	popq	%r12
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r12
	popq	%rbp
.cfi_adjust_cfa_offset	-8
.cfi_restore	%rbp
	popq	%rbx
.cfi_adjust_cfa_offset	-8
.cfi_restore	%rbx
	.byte	0xf3,0xc3
.Ldecrypt_seh_end:
.cfi_endproc	
.size	ossl_aes_gcm_decrypt_avx512, .-ossl_aes_gcm_decrypt_avx512
.globl	ossl_aes_gcm_finalize_avx512
.type	ossl_aes_gcm_finalize_avx512,@function
.align	32
ossl_aes_gcm_finalize_avx512:
.cfi_startproc	
.byte	243,15,30,250
	vmovdqu	336(%rdi),%xmm2
	vmovdqu	32(%rdi),%xmm3
	vmovdqu	64(%rdi),%xmm4


	cmpq	$0,%rsi
	je	.L_partial_done_984

	vpclmulqdq	$0x11,%xmm2,%xmm4,%xmm0
	vpclmulqdq	$0x00,%xmm2,%xmm4,%xmm16
	vpclmulqdq	$0x01,%xmm2,%xmm4,%xmm17
	vpclmulqdq	$0x10,%xmm2,%xmm4,%xmm4
	vpxorq	%xmm17,%xmm4,%xmm4

	vpsrldq	$8,%xmm4,%xmm17
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm17,%xmm0,%xmm0
	vpxorq	%xmm16,%xmm4,%xmm4



	vmovdqu64	POLY2(%rip),%xmm17

	vpclmulqdq	$0x01,%xmm4,%xmm17,%xmm16
	vpslldq	$8,%xmm16,%xmm16
	vpxorq	%xmm16,%xmm4,%xmm4



	vpclmulqdq	$0x00,%xmm4,%xmm17,%xmm16
	vpsrldq	$4,%xmm16,%xmm16
	vpclmulqdq	$0x10,%xmm4,%xmm17,%xmm4
	vpslldq	$4,%xmm4,%xmm4

	vpternlogq	$0x96,%xmm16,%xmm0,%xmm4

.L_partial_done_984:
	vmovq	56(%rdi),%xmm5
	vpinsrq	$1,48(%rdi),%xmm5,%xmm5
	vpsllq	$3,%xmm5,%xmm5

	vpxor	%xmm5,%xmm4,%xmm4

	vpclmulqdq	$0x11,%xmm2,%xmm4,%xmm0
	vpclmulqdq	$0x00,%xmm2,%xmm4,%xmm16
	vpclmulqdq	$0x01,%xmm2,%xmm4,%xmm17
	vpclmulqdq	$0x10,%xmm2,%xmm4,%xmm4
	vpxorq	%xmm17,%xmm4,%xmm4

	vpsrldq	$8,%xmm4,%xmm17
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm17,%xmm0,%xmm0
	vpxorq	%xmm16,%xmm4,%xmm4



	vmovdqu64	POLY2(%rip),%xmm17

	vpclmulqdq	$0x01,%xmm4,%xmm17,%xmm16
	vpslldq	$8,%xmm16,%xmm16
	vpxorq	%xmm16,%xmm4,%xmm4



	vpclmulqdq	$0x00,%xmm4,%xmm17,%xmm16
	vpsrldq	$4,%xmm16,%xmm16
	vpclmulqdq	$0x10,%xmm4,%xmm17,%xmm4
	vpslldq	$4,%xmm4,%xmm4

	vpternlogq	$0x96,%xmm16,%xmm0,%xmm4

	vpshufb	SHUF_MASK(%rip),%xmm4,%xmm4
	vpxor	%xmm4,%xmm3,%xmm3

.L_return_T_984:
	vmovdqu	%xmm3,64(%rdi)
.Labort_finalize:
	.byte	0xf3,0xc3
.cfi_endproc	
.size	ossl_aes_gcm_finalize_avx512, .-ossl_aes_gcm_finalize_avx512
.globl	ossl_gcm_gmult_avx512
.hidden	ossl_gcm_gmult_avx512
.type	ossl_gcm_gmult_avx512,@function
.align	32
ossl_gcm_gmult_avx512:
.cfi_startproc	
.byte	243,15,30,250
	vmovdqu64	(%rdi),%xmm1
	vmovdqu64	336(%rsi),%xmm2

	vpclmulqdq	$0x11,%xmm2,%xmm1,%xmm3
	vpclmulqdq	$0x00,%xmm2,%xmm1,%xmm4
	vpclmulqdq	$0x01,%xmm2,%xmm1,%xmm5
	vpclmulqdq	$0x10,%xmm2,%xmm1,%xmm1
	vpxorq	%xmm5,%xmm1,%xmm1

	vpsrldq	$8,%xmm1,%xmm5
	vpslldq	$8,%xmm1,%xmm1
	vpxorq	%xmm5,%xmm3,%xmm3
	vpxorq	%xmm4,%xmm1,%xmm1



	vmovdqu64	POLY2(%rip),%xmm5

	vpclmulqdq	$0x01,%xmm1,%xmm5,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm1,%xmm1



	vpclmulqdq	$0x00,%xmm1,%xmm5,%xmm4
	vpsrldq	$4,%xmm4,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm5,%xmm1
	vpslldq	$4,%xmm1,%xmm1

	vpternlogq	$0x96,%xmm4,%xmm3,%xmm1

	vmovdqu64	%xmm1,(%rdi)
	vzeroupper
.Labort_gmult:
	.byte	0xf3,0xc3
.cfi_endproc	
.size	ossl_gcm_gmult_avx512, .-ossl_gcm_gmult_avx512
.section	.rodata
.align	16
POLY:.quad	0x0000000000000001, 0xC200000000000000

.align	64
POLY2:
.quad	0x00000001C2000000, 0xC200000000000000
.quad	0x00000001C2000000, 0xC200000000000000
.quad	0x00000001C2000000, 0xC200000000000000
.quad	0x00000001C2000000, 0xC200000000000000

.align	16
TWOONE:.quad	0x0000000000000001, 0x0000000100000000



.align	64
SHUF_MASK:
.quad	0x08090A0B0C0D0E0F, 0x0001020304050607
.quad	0x08090A0B0C0D0E0F, 0x0001020304050607
.quad	0x08090A0B0C0D0E0F, 0x0001020304050607
.quad	0x08090A0B0C0D0E0F, 0x0001020304050607

.align	16
SHIFT_MASK:
.quad	0x0706050403020100, 0x0f0e0d0c0b0a0908

ALL_F:
.quad	0xffffffffffffffff, 0xffffffffffffffff

ZERO:
.quad	0x0000000000000000, 0x0000000000000000

.align	16
ONE:
.quad	0x0000000000000001, 0x0000000000000000

.align	16
ONEf:
.quad	0x0000000000000000, 0x0100000000000000

.align	64
ddq_add_1234:
.quad	0x0000000000000001, 0x0000000000000000
.quad	0x0000000000000002, 0x0000000000000000
.quad	0x0000000000000003, 0x0000000000000000
.quad	0x0000000000000004, 0x0000000000000000

.align	64
ddq_add_5678:
.quad	0x0000000000000005, 0x0000000000000000
.quad	0x0000000000000006, 0x0000000000000000
.quad	0x0000000000000007, 0x0000000000000000
.quad	0x0000000000000008, 0x0000000000000000

.align	64
ddq_add_4444:
.quad	0x0000000000000004, 0x0000000000000000
.quad	0x0000000000000004, 0x0000000000000000
.quad	0x0000000000000004, 0x0000000000000000
.quad	0x0000000000000004, 0x0000000000000000

.align	64
ddq_add_8888:
.quad	0x0000000000000008, 0x0000000000000000
.quad	0x0000000000000008, 0x0000000000000000
.quad	0x0000000000000008, 0x0000000000000000
.quad	0x0000000000000008, 0x0000000000000000

.align	64
ddq_addbe_1234:
.quad	0x0000000000000000, 0x0100000000000000
.quad	0x0000000000000000, 0x0200000000000000
.quad	0x0000000000000000, 0x0300000000000000
.quad	0x0000000000000000, 0x0400000000000000

.align	64
ddq_addbe_4444:
.quad	0x0000000000000000, 0x0400000000000000
.quad	0x0000000000000000, 0x0400000000000000
.quad	0x0000000000000000, 0x0400000000000000
.quad	0x0000000000000000, 0x0400000000000000

.align	64
byte_len_to_mask_table:
.value	0x0000, 0x0001, 0x0003, 0x0007
.value	0x000f, 0x001f, 0x003f, 0x007f
.value	0x00ff, 0x01ff, 0x03ff, 0x07ff
.value	0x0fff, 0x1fff, 0x3fff, 0x7fff
.value	0xffff

.align	64
byte64_len_to_mask_table:
.quad	0x0000000000000000, 0x0000000000000001
.quad	0x0000000000000003, 0x0000000000000007
.quad	0x000000000000000f, 0x000000000000001f
.quad	0x000000000000003f, 0x000000000000007f
.quad	0x00000000000000ff, 0x00000000000001ff
.quad	0x00000000000003ff, 0x00000000000007ff
.quad	0x0000000000000fff, 0x0000000000001fff
.quad	0x0000000000003fff, 0x0000000000007fff
.quad	0x000000000000ffff, 0x000000000001ffff
.quad	0x000000000003ffff, 0x000000000007ffff
.quad	0x00000000000fffff, 0x00000000001fffff
.quad	0x00000000003fffff, 0x00000000007fffff
.quad	0x0000000000ffffff, 0x0000000001ffffff
.quad	0x0000000003ffffff, 0x0000000007ffffff
.quad	0x000000000fffffff, 0x000000001fffffff
.quad	0x000000003fffffff, 0x000000007fffffff
.quad	0x00000000ffffffff, 0x00000001ffffffff
.quad	0x00000003ffffffff, 0x00000007ffffffff
.quad	0x0000000fffffffff, 0x0000001fffffffff
.quad	0x0000003fffffffff, 0x0000007fffffffff
.quad	0x000000ffffffffff, 0x000001ffffffffff
.quad	0x000003ffffffffff, 0x000007ffffffffff
.quad	0x00000fffffffffff, 0x00001fffffffffff
.quad	0x00003fffffffffff, 0x00007fffffffffff
.quad	0x0000ffffffffffff, 0x0001ffffffffffff
.quad	0x0003ffffffffffff, 0x0007ffffffffffff
.quad	0x000fffffffffffff, 0x001fffffffffffff
.quad	0x003fffffffffffff, 0x007fffffffffffff
.quad	0x00ffffffffffffff, 0x01ffffffffffffff
.quad	0x03ffffffffffffff, 0x07ffffffffffffff
.quad	0x0fffffffffffffff, 0x1fffffffffffffff
.quad	0x3fffffffffffffff, 0x7fffffffffffffff
.quad	0xffffffffffffffff
	.section ".note.gnu.property", "a"
	.p2align 3
	.long 1f - 0f
	.long 4f - 1f
	.long 5
0:
	# "GNU" encoded with .byte, since .asciz isn't supported
	# on Solaris.
	.byte 0x47
	.byte 0x4e
	.byte 0x55
	.byte 0
1:
	.p2align 3
	.long 0xc0000002
	.long 3f - 2f
2:
	.long 3
3:
	.p2align 3
4:
