Lines 101-107
Link Here
|
101 |
compiler choice is limited to GCC and Microsoft C. */ |
101 |
compiler choice is limited to GCC and Microsoft C. */ |
102 |
#undef COMPILE_HW_PADLOCK |
102 |
#undef COMPILE_HW_PADLOCK |
103 |
#if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM) |
103 |
#if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM) |
104 |
# if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \ |
104 |
# if (defined(__GNUC__) && __GNUC__>=2 && \ |
|
|
105 |
(defined(__i386__) || defined(__i386) || \ |
106 |
defined(__x86_64__) || defined(__x86_64)) \ |
107 |
) || \ |
105 |
(defined(_MSC_VER) && defined(_M_IX86)) |
108 |
(defined(_MSC_VER) && defined(_M_IX86)) |
106 |
# define COMPILE_HW_PADLOCK |
109 |
# define COMPILE_HW_PADLOCK |
107 |
static ENGINE *ENGINE_padlock (void); |
110 |
static ENGINE *ENGINE_padlock (void); |
Lines 131-137
Link Here
|
131 |
# endif |
134 |
# endif |
132 |
#elif defined(__GNUC__) |
135 |
#elif defined(__GNUC__) |
133 |
# ifndef alloca |
136 |
# ifndef alloca |
134 |
# define alloca(s) __builtin_alloca(s) |
137 |
# define alloca(s) __builtin_alloca((s)) |
135 |
# endif |
138 |
# endif |
136 |
#endif |
139 |
#endif |
137 |
|
140 |
|
Lines 294-299
Link Here
|
294 |
* ======================================================= |
297 |
* ======================================================= |
295 |
*/ |
298 |
*/ |
296 |
#if defined(__GNUC__) && __GNUC__>=2 |
299 |
#if defined(__GNUC__) && __GNUC__>=2 |
|
|
300 |
#if defined(__i386__) || defined(__i386) |
297 |
/* |
301 |
/* |
298 |
* As for excessive "push %ebx"/"pop %ebx" found all over. |
302 |
* As for excessive "push %ebx"/"pop %ebx" found all over. |
299 |
* When generating position-independent code GCC won't let |
303 |
* When generating position-independent code GCC won't let |
Lines 373-393
Link Here
|
373 |
return padlock_use_ace + padlock_use_rng; |
377 |
return padlock_use_ace + padlock_use_rng; |
374 |
} |
378 |
} |
375 |
|
379 |
|
376 |
#ifndef OPENSSL_NO_AES |
|
|
377 |
/* Our own htonl()/ntohl() */ |
378 |
static inline void |
379 |
padlock_bswapl(AES_KEY *ks) |
380 |
{ |
381 |
size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]); |
382 |
unsigned int *key = ks->rd_key; |
383 |
|
384 |
while (i--) { |
385 |
asm volatile ("bswapl %0" : "+r"(*key)); |
386 |
key++; |
387 |
} |
388 |
} |
389 |
#endif |
390 |
|
391 |
/* Force key reload from memory to the CPU microcode. |
380 |
/* Force key reload from memory to the CPU microcode. |
392 |
Loading EFLAGS from the stack clears EFLAGS[30] |
381 |
Loading EFLAGS from the stack clears EFLAGS[30] |
393 |
which does the trick. */ |
382 |
which does the trick. */ |
Lines 445-456
Link Here
|
445 |
: "edx", "cc", "memory"); \ |
434 |
: "edx", "cc", "memory"); \ |
446 |
return iv; \ |
435 |
return iv; \ |
447 |
} |
436 |
} |
|
|
437 |
#endif |
438 |
|
439 |
#elif defined(__x86_64__) || defined(__x86_64) |
440 |
|
441 |
/* Load supported features of the CPU to see if |
442 |
the PadLock is available. */ |
443 |
static int |
444 |
padlock_available(void) |
445 |
{ |
446 |
char vendor_string[16]; |
447 |
unsigned int eax, edx; |
448 |
|
448 |
|
|
|
449 |
/* Are we running on the Centaur (VIA) CPU? */ |
450 |
eax = 0x00000000; |
451 |
vendor_string[12] = 0; |
452 |
asm volatile ( |
453 |
"cpuid\n" |
454 |
"movl %%ebx,(%1)\n" |
455 |
"movl %%edx,4(%1)\n" |
456 |
"movl %%ecx,8(%1)\n" |
457 |
: "+a"(eax) : "r"(vendor_string) : "rbx", "rcx", "rdx"); |
458 |
if (strcmp(vendor_string, "CentaurHauls") != 0) |
459 |
return 0; |
460 |
|
461 |
/* Check for Centaur Extended Feature Flags presence */ |
462 |
eax = 0xC0000000; |
463 |
asm volatile ("cpuid" |
464 |
: "+a"(eax) : : "rbx", "rcx", "rdx"); |
465 |
if (eax < 0xC0000001) |
466 |
return 0; |
467 |
|
468 |
/* Read the Centaur Extended Feature Flags */ |
469 |
eax = 0xC0000001; |
470 |
asm volatile ("cpuid" |
471 |
: "+a"(eax), "=d"(edx) : : "rbx", "rcx"); |
472 |
|
473 |
/* Fill up some flags */ |
474 |
padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6)); |
475 |
padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2)); |
476 |
|
477 |
return padlock_use_ace + padlock_use_rng; |
478 |
} |
479 |
|
480 |
/* Force key reload from memory to the CPU microcode. |
481 |
Loading EFLAGS from the stack clears EFLAGS[30] |
482 |
which does the trick. */ |
483 |
static inline void |
484 |
padlock_reload_key(void) |
485 |
{ |
486 |
asm volatile ("pushfq; popfq"); |
487 |
} |
488 |
|
489 |
#ifndef OPENSSL_NO_AES |
490 |
/* |
491 |
* This is heuristic key context tracing. At first one |
492 |
* believes that one should use atomic swap instructions, |
493 |
* but it's not actually necessary. Point is that if |
494 |
* padlock_saved_context was changed by another thread |
495 |
* after we've read it and before we compare it with cdata, |
496 |
* our key *shall* be reloaded upon thread context switch |
497 |
* and we are therefore set in either case... |
498 |
*/ |
499 |
static inline void |
500 |
padlock_verify_context(struct padlock_cipher_data *cdata) |
501 |
{ |
502 |
asm volatile ( |
503 |
"pushfq\n" |
504 |
" btl $30,(%%rsp)\n" |
505 |
" jnc 1f\n" |
506 |
" cmpq %2,%1\n" |
507 |
" je 1f\n" |
508 |
" popfq\n" |
509 |
" subq $8,%%rsp\n" |
510 |
"1: addq $8,%%rsp\n" |
511 |
" movq %2,%0" |
512 |
:"+m"(padlock_saved_context) |
513 |
: "r"(padlock_saved_context), "r"(cdata) : "cc"); |
514 |
} |
515 |
|
516 |
/* Template for padlock_xcrypt_* modes */ |
517 |
/* BIG FAT WARNING: |
518 |
* The offsets used with 'leal' instructions |
519 |
* describe items of the 'padlock_cipher_data' |
520 |
* structure. |
521 |
*/ |
522 |
#define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \ |
523 |
static inline void *name(size_t cnt, \ |
524 |
struct padlock_cipher_data *cdata, \ |
525 |
void *out, const void *inp) \ |
526 |
{ void *iv; \ |
527 |
asm volatile ( "leaq 16(%0),%%rdx\n" \ |
528 |
" leaq 32(%0),%%rbx\n" \ |
529 |
rep_xcrypt "\n" \ |
530 |
: "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \ |
531 |
: "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \ |
532 |
: "rbx", "rdx", "cc", "memory"); \ |
533 |
return iv; \ |
534 |
} |
535 |
#endif |
536 |
|
537 |
#endif /* cpu */ |
538 |
|
539 |
#ifndef OPENSSL_NO_AES |
449 |
/* Generate all functions with appropriate opcodes */ |
540 |
/* Generate all functions with appropriate opcodes */ |
450 |
PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8") /* rep xcryptecb */ |
541 |
PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8") /* rep xcryptecb */ |
451 |
PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0") /* rep xcryptcbc */ |
542 |
PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0") /* rep xcryptcbc */ |
452 |
PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0") /* rep xcryptcfb */ |
543 |
PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0") /* rep xcryptcfb */ |
453 |
PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8") /* rep xcryptofb */ |
544 |
PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8") /* rep xcryptofb */ |
|
|
545 |
|
546 |
/* Our own htonl()/ntohl() */ |
547 |
static inline void |
548 |
padlock_bswapl(AES_KEY *ks) |
549 |
{ |
550 |
size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]); |
551 |
unsigned int *key = ks->rd_key; |
552 |
|
553 |
while (i--) { |
554 |
asm volatile ("bswapl %0" : "+r"(*key)); |
555 |
key++; |
556 |
} |
557 |
} |
454 |
#endif |
558 |
#endif |
455 |
|
559 |
|
456 |
/* The RNG call itself */ |
560 |
/* The RNG call itself */ |
Lines 481-488
Link Here
|
481 |
static inline unsigned char * |
585 |
static inline unsigned char * |
482 |
padlock_memcpy(void *dst,const void *src,size_t n) |
586 |
padlock_memcpy(void *dst,const void *src,size_t n) |
483 |
{ |
587 |
{ |
484 |
long *d=dst; |
588 |
size_t *d=dst; |
485 |
const long *s=src; |
589 |
const size_t *s=src; |
486 |
|
590 |
|
487 |
n /= sizeof(*d); |
591 |
n /= sizeof(*d); |
488 |
do { *d++ = *s++; } while (--n); |
592 |
do { *d++ = *s++; } while (--n); |