본문 바로가기
알고리즘/[EXP] 삼성 SW 역량 테스트 C형

비트 압축 - 37.5% 압축하기 2 (16bit : 10bit ~)

by 피로물든딸기 2023. 7. 30.
반응형

삼성 C형 전체 링크

 

참고

- 33% 압축하기

- 37.5% 압축하기 1 (8bit : 5bit)

- 37.5% 압축하기 2 (16bit : 10bit ~)

- 허프만 알고리즘

 

8비트 압축과 같은 방식으로 short 16bit10bit, int 32bit20bit, long 64bit40bit도 해보자.

이 경우 short에 들어가있는 16bit는 210 = 1024 미만의 수로만 구성되어 있다고 볼 수 있다.


short 16 to 10

 

short를 그림으로 그려보면 다음과 같다.

 

char의 그림을 2배로 늘린 것과 같다.

 

char 압축 코드는 아래와 같았다.

void char8to5(unsigned char src[8], unsigned char dest[5])
{
	printf("src 8 >> dest 5\n");

	for (int i = 0; i < 8; i++) src[i] -= 'a';
	for (int i = 0; i < 8; i++) printBitNumber(src[i]);

	dest[0] = src[0] << 3;
	dest[0] |= ((src[1] >> 2));
	
	dest[1] = (src[1] << 6);
	dest[1] |= (src[2] << 1);
	dest[1] |= (src[3] >> 4);

	dest[2] = (src[3] << 4);
	dest[2] |= (src[4] >> 1);
	
	dest[3] = (src[4] << 7);
	dest[3] |= (src[5] << 2);
	dest[3] |= (src[6] >> 3);

	dest[4] = (src[6] << 5);
	dest[4] |= (src[7]);

	putchar('\n');

	for (int i = 0; i < 5; i++) printBitNumber(dest[i]);
}

void char5to8(unsigned char dest[5], unsigned char src[8])
{
	printf("\ndest 5 >> src 8 \n");

	for (int i = 0; i < 8; i++) src[i] = 0;

	src[0] = (dest[0] >> 3);
	src[1] = ((dest[0] & 0x07) << 2) | (dest[1] >> 6);
	src[2] = ((dest[1] >> 1) & 0x1F);
	src[3] = ((dest[1] & 0x01) << 4) | (dest[2] >> 4);
	src[4] = ((dest[2] & 0x0F) << 1) | (dest[3] >> 7);
	src[5] = ((dest[3] >> 2) & 0x1F);
	src[6] = ((dest[3] & 0x03) << 3) | (dest[4] >> 5);
	src[7] = (dest[4] & 0x1F);

	for (int i = 0; i < 8; i++) printBitNumber(src[i]);
	for (int i = 0; i < 8; i++) src[i] += 'a';
}

 

 

즉, short는 비트를 2배 이동하고 유효한 비트의 범위를 2배 늘려주면 된다.

void short16to10(unsigned short src[8], unsigned short dest[5])
{
	dest[0] = src[0] << 6;
	dest[0] |= ((src[1] >> 4));
	
	dest[1] = (src[1] << 12);
	dest[1] |= (src[2] << 2);
	dest[1] |= (src[3] >> 8);

	dest[2] = (src[3] << 8);
	dest[2] |= (src[4] >> 2);
	
	dest[3] = (src[4] << 14);
	dest[3] |= (src[5] << 4);
	dest[3] |= (src[6] >> 6);

	dest[4] = (src[6] << 10);
	dest[4] |= (src[7]);
}

void short10to16(unsigned short dest[5], unsigned short src[8])
{
	for (int i = 0; i < 8; i++) src[i] = 0;

	src[0] = (dest[0] >> 6);
	src[1] = ((dest[0] & 0x003F) << 4) | (dest[1] >> 12);
	src[2] = ((dest[1] >> 2) & 0x03FF);
	src[3] = ((dest[1] & 0x0003) << 8) | (dest[2] >> 8);
	src[4] = ((dest[2] & 0x00FF) << 2) | (dest[3] >> 14);
	src[5] = ((dest[3] >> 4) & 0x03FF);
	src[6] = ((dest[3] & 0x000F) << 6) | (dest[4] >> 10);
	src[7] = (dest[4] & 0x03FF);
}

 

전체 코드는 다음과 같다.

#include <stdio.h>

unsigned short original[16000];
unsigned short temp[10000];

template <typename T>
void printBitNumber(T number)
{
	unsigned int bitSize = sizeof(number) * 8;
	T mask = (1ull) << (bitSize - 1);

	printf("%d", (number & mask) == mask);

	mask = (1ull) << (bitSize - 2);
	for (int i = 1; i < bitSize; i++)
	{
		printf("%d", (number & mask) == mask);
		mask >>= 1;
		if (i % 8 == 7) printf(" ");
	}
	putchar('\n');
}

int compare(unsigned short *a, unsigned short *b, int size)
{
	for (int i = 0; i < size; i++)
		if (a[i] != b[i]) return a[i] - b[i];
		
	return 0;
}

void short16to10(unsigned short src[8], unsigned short dest[5])
{
	dest[0] = src[0] << 6;
	dest[0] |= ((src[1] >> 4));
	
	dest[1] = (src[1] << 12);
	dest[1] |= (src[2] << 2);
	dest[1] |= (src[3] >> 8);

	dest[2] = (src[3] << 8);
	dest[2] |= (src[4] >> 2);
	
	dest[3] = (src[4] << 14);
	dest[3] |= (src[5] << 4);
	dest[3] |= (src[6] >> 6);

	dest[4] = (src[6] << 10);
	dest[4] |= (src[7]);
}

void short10to16(unsigned short dest[5], unsigned short src[8])
{
	for (int i = 0; i < 8; i++) src[i] = 0;

	src[0] = (dest[0] >> 6);
	src[1] = ((dest[0] & 0x003F) << 4) | (dest[1] >> 12);
	src[2] = ((dest[1] >> 2) & 0x03FF);
	src[3] = ((dest[1] & 0x0003) << 8) | (dest[2] >> 8);
	src[4] = ((dest[2] & 0x00FF) << 2) | (dest[3] >> 14);
	src[5] = ((dest[3] >> 4) & 0x03FF);
	src[6] = ((dest[3] & 0x000F) << 6) | (dest[4] >> 10);
	src[7] = (dest[4] & 0x03FF);
}

void compress(unsigned short* src, unsigned short* dest)
{
	int destIndex = 0;
	for (int i = 0; i < 16000; i += 8)
	{
		short16to10(src + i, dest + destIndex);
		destIndex += 5;
	}
}

void decompress(unsigned short* dest, unsigned short* src)
{
	int srcIndex = 0;
	for (int i = 0; i < 10000; i += 5)
	{
		short10to16(dest + i, src + srcIndex);
		srcIndex += 8;
	}
}

int main()
{
	unsigned short problem[16000] = { 0 };

	for (int i = 0; i < 16000; i++) problem[i] = i % 1024;

	for (int i = 0; i < 16000; i++) original[i] = problem[i];

	compress(original, temp);

	for (int i = 0; i < 16000; i++) original[i] = 0;

	decompress(temp, original);

	if (compare(problem, original, 16000) == 0) printf("PASS!!\n");
	else printf("FAIL!!\n");

	return 0;
}

int 32 to 20

 

int에 들어가있는 32bit는 220 = 1,048,576 미만의 수로만 구성되어 있다고 볼 수 있다.

#include <stdio.h>
#include <cstdlib>

unsigned int original[16000];
unsigned int temp[10000];

template <typename T>
void printBitNumber(T number)
{
	unsigned int bitSize = sizeof(number) * 8;
	T mask = (1ull) << (bitSize - 1);

	printf("%d", (number & mask) == mask);

	mask = (1ull) << (bitSize - 2);
	for (int i = 1; i < bitSize; i++)
	{
		printf("%d", (number & mask) == mask);
		mask >>= 1;
		if (i % 8 == 7) printf(" ");
	}
	putchar('\n');
}

int compare(unsigned int *a, unsigned int *b, int size)
{
	for (int i = 0; i < size; i++)
		if (a[i] != b[i]) return a[i] - b[i];
		
	return 0;
}

void int32to20(unsigned int src[8], unsigned int dest[5])
{
	dest[0] = src[0] << 12;
	dest[0] |= ((src[1] >> 8));
	
	dest[1] = (src[1] << 24);
	dest[1] |= (src[2] << 4);
	dest[1] |= (src[3] >> 16);

	dest[2] = (src[3] << 16);
	dest[2] |= (src[4] >> 4);
	
	dest[3] = (src[4] << 28);
	dest[3] |= (src[5] << 8);
	dest[3] |= (src[6] >> 12);

	dest[4] = (src[6] << 20);
	dest[4] |= (src[7]);
}

void int20to32(unsigned int dest[5], unsigned int src[8])
{
	for (int i = 0; i < 8; i++) src[i] = 0;

	src[0] = (dest[0] >> 12);
	src[1] = ((dest[0] & 0x00000FFF) << 8) | (dest[1] >> 24);
	src[2] = ((dest[1] >> 4) & 0x000FFFFF);
	src[3] = ((dest[1] & 0x0000000F) << 16) | (dest[2] >> 16);
	src[4] = ((dest[2] & 0x0000FFFF) << 4) | (dest[3] >> 28);
	src[5] = ((dest[3] >> 8) & 0x000FFFFF);
	src[6] = ((dest[3] & 0x000000FF) << 12) | (dest[4] >> 20);
	src[7] = (dest[4] & 0x000FFFFF);
}

void compress(unsigned int* src, unsigned int* dest)
{
	int destIndex = 0;
	for (int i = 0; i < 16000; i += 8)
	{
		int32to20(src + i, dest + destIndex);
		destIndex += 5;
	}
}

void decompress(unsigned int* dest, unsigned int* src)
{
	int srcIndex = 0;
	for (int i = 0; i < 10000; i += 5)
	{
		int20to32(dest + i, src + srcIndex);
		srcIndex += 8;
	}
}

int main()
{
	unsigned int problem[16000] = { 0 };

	for (int i = 0; i < 16000; i++) problem[i] = rand() % 1048576;

	for (int i = 0; i < 16000; i++) original[i] = problem[i];

	compress(original, temp);

	for (int i = 0; i < 16000; i++) original[i] = 0;

	decompress(temp, original);

	if (compare(problem, original, 16000) == 0) printf("PASS!!\n");
	else printf("FAIL!!\n");

	return 0;
}

long 64 to 40

 

long에 들어가있는 64bit는 240 = 1,099,511,627,776 미만의 수로만 구성되어 있다고 볼 수 있다.

#include <stdio.h>
#include <cstdlib>

typedef unsigned long long int ull;

ull original[16000];
ull temp[10000];

template <typename T>
void printBitNumber(T number)
{
	unsigned int bitSize = sizeof(number) * 8;
	T mask = (1ull) << (bitSize - 1);

	printf("%d", (number & mask) == mask);

	mask = (1ull) << (bitSize - 2);
	for (int i = 1; i < bitSize; i++)
	{
		printf("%d", (number & mask) == mask);
		mask >>= 1;
		if (i % 8 == 7) printf(" ");
	}
	putchar('\n');
}

long compare(ull *a, ull *b, int size)
{
	for (int i = 0; i < size; i++)
	{
		if (a[i] != b[i]) printf("???? %d %lld %lld\n", i, a[i], b[i]);
		if (a[i] != b[i]) return a[i] - b[i];
	}
		
	return 0;
}

void long64to40(ull src[8], ull dest[5])
{
	dest[0] = (src[0] << 24);
	dest[0] |= ((src[1] >> 16));
	
	dest[1] = (src[1] << 48);
	dest[1] |= (src[2] << 8);
	dest[1] |= (src[3] >> 32);

	dest[2] = (src[3] << 32);
	dest[2] |= (src[4] >> 8);
	
	dest[3] = (src[4] << 56);
	dest[3] |= (src[5] << 16);
	dest[3] |= (src[6] >> 24);

	dest[4] = (src[6] << 40);
	dest[4] |= (src[7]);
}

void long40to64(ull dest[5], ull src[8])
{
	for (int i = 0; i < 8; i++) src[i] = 0;

	src[0] = (dest[0] >> 24);
	src[1] = ((dest[0] & 0x00000000FFFFFF) << 16) | (dest[1] >> 48);
	src[2] = ((dest[1] >> 8) & 0x000000FFFFFFFFFF);
	src[3] = ((dest[1] & 0x00000000000000FF) << 32) | (dest[2] >> 32);
	src[4] = ((dest[2] & 0x00000000FFFFFFFF) << 8) | (dest[3] >> 56);
	src[5] = ((dest[3] >> 16) & 0x000000FFFFFFFFFF);
	src[6] = ((dest[3] & 0x00000000000000FFFF) << 24) | (dest[4] >> 40);
	src[7] = (dest[4] & 0x000000FFFFFFFFFF);
}

void compress(ull src[5], ull dest[8])
{
	int destIndex = 0;
	for (int i = 0; i < 16000; i += 8)
	{
		long64to40(src + i, dest + destIndex);
		destIndex += 5;
	}
}

void decompress(ull* dest, ull* src)
{
	int srcIndex = 0;
	for (int i = 0; i < 10000; i += 5)
	{
		long40to64(dest + i, src + srcIndex);
		srcIndex += 8;
	}
}

int main()
{
	ull problem[16000] = { 0 };

	for (int i = 0; i < 16000; i++) problem[i] = 1099511627776 - rand();

	for (int i = 0; i < 16000; i++) original[i] = problem[i];

	compress(original, temp);

	for (int i = 0; i < 16000; i++) original[i] = 0;

	decompress(temp, original);

	if (compare(problem, original, 16000) == 0) printf("PASS!!\n");
	else printf("FAIL!!\n");

	return 0;
}

type별 코드 정리

void char8to5(unsigned char src[8], unsigned char dest[5])
{
	printf("src 8 >> dest 5\n");

	for (int i = 0; i < 8; i++) src[i] -= 'a';
	for (int i = 0; i < 8; i++) printBitNumber(src[i]);

	dest[0] = src[0] << 3;
	dest[0] |= ((src[1] >> 2));
	
	dest[1] = (src[1] << 6);
	dest[1] |= (src[2] << 1);
	dest[1] |= (src[3] >> 4);

	dest[2] = (src[3] << 4);
	dest[2] |= (src[4] >> 1);
	
	dest[3] = (src[4] << 7);
	dest[3] |= (src[5] << 2);
	dest[3] |= (src[6] >> 3);

	dest[4] = (src[6] << 5);
	dest[4] |= (src[7]);

	putchar('\n');

	for (int i = 0; i < 5; i++) printBitNumber(dest[i]);
}

void char5to8(unsigned char dest[5], unsigned char src[8])
{
	printf("\ndest 5 >> src 8 \n");

	for (int i = 0; i < 8; i++) src[i] = 0;

	src[0] = (dest[0] >> 3);
	src[1] = ((dest[0] & 0x07) << 2) | (dest[1] >> 6);
	src[2] = ((dest[1] >> 1) & 0x1F);
	src[3] = ((dest[1] & 0x01) << 4) | (dest[2] >> 4);
	src[4] = ((dest[2] & 0x0F) << 1) | (dest[3] >> 7);
	src[5] = ((dest[3] >> 2) & 0x1F);
	src[6] = ((dest[3] & 0x03) << 3) | (dest[4] >> 5);
	src[7] = (dest[4] & 0x1F);

	for (int i = 0; i < 8; i++) printBitNumber(src[i]);
	for (int i = 0; i < 8; i++) src[i] += 'a';
}

void short16to10(unsigned short src[8], unsigned short dest[5])
{
	dest[0] = src[0] << 6;
	dest[0] |= ((src[1] >> 4));
	
	dest[1] = (src[1] << 12);
	dest[1] |= (src[2] << 2);
	dest[1] |= (src[3] >> 8);

	dest[2] = (src[3] << 8);
	dest[2] |= (src[4] >> 2);
	
	dest[3] = (src[4] << 14);
	dest[3] |= (src[5] << 4);
	dest[3] |= (src[6] >> 6);

	dest[4] = (src[6] << 10);
	dest[4] |= (src[7]);
}

void short10to16(unsigned short dest[5], unsigned short src[8])
{
	for (int i = 0; i < 8; i++) src[i] = 0;

	src[0] = (dest[0] >> 6);
	src[1] = ((dest[0] & 0x003F) << 4) | (dest[1] >> 12);
	src[2] = ((dest[1] >> 2) & 0x03FF);
	src[3] = ((dest[1] & 0x0003) << 8) | (dest[2] >> 8);
	src[4] = ((dest[2] & 0x00FF) << 2) | (dest[3] >> 14);
	src[5] = ((dest[3] >> 4) & 0x03FF);
	src[6] = ((dest[3] & 0x000F) << 6) | (dest[4] >> 10);
	src[7] = (dest[4] & 0x03FF);
}

void int32to20(unsigned int src[8], unsigned int dest[5])
{
	dest[0] = src[0] << 12;
	dest[0] |= ((src[1] >> 8));
	
	dest[1] = (src[1] << 24);
	dest[1] |= (src[2] << 4);
	dest[1] |= (src[3] >> 16);

	dest[2] = (src[3] << 16);
	dest[2] |= (src[4] >> 4);
	
	dest[3] = (src[4] << 28);
	dest[3] |= (src[5] << 8);
	dest[3] |= (src[6] >> 12);

	dest[4] = (src[6] << 20);
	dest[4] |= (src[7]);
}

void int20to32(unsigned int dest[5], unsigned int src[8])
{
	for (int i = 0; i < 8; i++) src[i] = 0;

	src[0] = (dest[0] >> 12);
	src[1] = ((dest[0] & 0x00000FFF) << 8) | (dest[1] >> 24);
	src[2] = ((dest[1] >> 4) & 0x000FFFFF);
	src[3] = ((dest[1] & 0x0000000F) << 16) | (dest[2] >> 16);
	src[4] = ((dest[2] & 0x0000FFFF) << 4) | (dest[3] >> 28);
	src[5] = ((dest[3] >> 8) & 0x000FFFFF);
	src[6] = ((dest[3] & 0x000000FF) << 12) | (dest[4] >> 20);
	src[7] = (dest[4] & 0x000FFFFF);
}

void long64to40(ull src[8], ull dest[5])
{
	dest[0] = (src[0] << 24);
	dest[0] |= ((src[1] >> 16));
	
	dest[1] = (src[1] << 48);
	dest[1] |= (src[2] << 8);
	dest[1] |= (src[3] >> 32);

	dest[2] = (src[3] << 32);
	dest[2] |= (src[4] >> 8);
	
	dest[3] = (src[4] << 56);
	dest[3] |= (src[5] << 16);
	dest[3] |= (src[6] >> 24);

	dest[4] = (src[6] << 40);
	dest[4] |= (src[7]);
}

void long40to64(ull dest[5], ull src[8])
{
	for (int i = 0; i < 8; i++) src[i] = 0;

	src[0] = (dest[0] >> 24);
	src[1] = ((dest[0] & 0x00000000FFFFFF) << 16) | (dest[1] >> 48);
	src[2] = ((dest[1] >> 8) & 0x000000FFFFFFFFFF);
	src[3] = ((dest[1] & 0x00000000000000FF) << 32) | (dest[2] >> 32);
	src[4] = ((dest[2] & 0x00000000FFFFFFFF) << 8) | (dest[3] >> 56);
	src[5] = ((dest[3] >> 16) & 0x000000FFFFFFFFFF);
	src[6] = ((dest[3] & 0x00000000000000FFFF) << 24) | (dest[4] >> 40);
	src[7] = (dest[4] & 0x000000FFFFFFFFFF);
}
반응형

댓글