~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-or-later
  2 
  3 /* P9 gzip sample code for demonstrating the P9 NX hardware interface.
  4  * Not intended for productive uses or for performance or compression
  5  * ratio measurements.  For simplicity of demonstration, this sample
  6  * code compresses in to fixed Huffman blocks only (Deflate btype=1)
  7  * and has very simple memory management.  Dynamic Huffman blocks
  8  * (Deflate btype=2) are more involved as detailed in the user guide.
  9  * Note also that /dev/crypto/gzip, VAS and skiboot support are
 10  * required.
 11  *
 12  * Copyright 2020 IBM Corp.
 13  *
 14  * https://github.com/libnxz/power-gzip for zlib api and other utils
 15  *
 16  * Author: Bulent Abali <abali@us.ibm.com>
 17  *
 18  * Definitions of acronyms used here. See
 19  * P9 NX Gzip Accelerator User's Manual for details:
 20  * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
 21  *
 22  * adler/crc: 32 bit checksums appended to stream tail
 23  * ce:       completion extension
 24  * cpb:      coprocessor parameter block (metadata)
 25  * crb:      coprocessor request block (command)
 26  * csb:      coprocessor status block (status)
 27  * dht:      dynamic huffman table
 28  * dde:      data descriptor element (address, length)
 29  * ddl:      list of ddes
 30  * dh/fh:    dynamic and fixed huffman types
 31  * fc:       coprocessor function code
 32  * histlen:  history/dictionary length
 33  * history:  sliding window of up to 32KB of data
 34  * lzcount:  Deflate LZ symbol counts
 35  * rembytecnt: remaining byte count
 36  * sfbt:     source final block type; last block's type during decomp
 37  * spbc:     source processed byte count
 38  * subc:     source unprocessed bit count
 39  * tebc:     target ending bit count; valid bits in the last byte
 40  * tpbc:     target processed byte count
 41  * vas:      virtual accelerator switch; the user mode interface
 42  */
 43 
 44 #define _ISOC11_SOURCE  // For aligned_alloc()
 45 #define _DEFAULT_SOURCE // For endian.h
 46 
 47 #include <stdio.h>
 48 #include <stdlib.h>
 49 #include <string.h>
 50 #include <unistd.h>
 51 #include <stdint.h>
 52 #include <sys/types.h>
 53 #include <sys/stat.h>
 54 #include <sys/time.h>
 55 #include <sys/fcntl.h>
 56 #include <sys/mman.h>
 57 #include <endian.h>
 58 #include <bits/endian.h>
 59 #include <sys/ioctl.h>
 60 #include <assert.h>
 61 #include <errno.h>
 62 #include <signal.h>
 63 #include "utils.h"
 64 #include "nxu.h"
 65 #include "nx.h"
 66 
 67 int nx_dbg;
 68 FILE *nx_gzip_log;
 69 
 70 #define NX_MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
 71 #define FNAME_MAX 1024
 72 #define FEXT ".nx.gz"
 73 
 74 #define SYSFS_MAX_REQ_BUF_PATH "devices/vio/ibm,compression-v1/nx_gzip_caps/req_max_processed_len"
 75 
 76 /*
 77  * LZ counts returned in the user supplied nx_gzip_crb_cpb_t structure.
 78  */
 79 static int compress_fht_sample(char *src, uint32_t srclen, char *dst,
 80                                 uint32_t dstlen, int with_count,
 81                                 struct nx_gzip_crb_cpb_t *cmdp, void *handle)
 82 {
 83         uint32_t fc;
 84 
 85         assert(!!cmdp);
 86 
 87         put32(cmdp->crb, gzip_fc, 0);  /* clear */
 88         fc = (with_count) ? GZIP_FC_COMPRESS_RESUME_FHT_COUNT :
 89                             GZIP_FC_COMPRESS_RESUME_FHT;
 90         putnn(cmdp->crb, gzip_fc, fc);
 91         putnn(cmdp->cpb, in_histlen, 0); /* resuming with no history */
 92         memset((void *) &cmdp->crb.csb, 0, sizeof(cmdp->crb.csb));
 93 
 94         /* Section 6.6 programming notes; spbc may be in two different
 95          * places depending on FC.
 96          */
 97         if (!with_count)
 98                 put32(cmdp->cpb, out_spbc_comp, 0);
 99         else
100                 put32(cmdp->cpb, out_spbc_comp_with_count, 0);
101 
102         /* Figure 6-3 6-4; CSB location */
103         put64(cmdp->crb, csb_address, 0);
104         put64(cmdp->crb, csb_address,
105               (uint64_t) &cmdp->crb.csb & csb_address_mask);
106 
107         /* Source direct dde (scatter-gather list) */
108         clear_dde(cmdp->crb.source_dde);
109         putnn(cmdp->crb.source_dde, dde_count, 0);
110         put32(cmdp->crb.source_dde, ddebc, srclen);
111         put64(cmdp->crb.source_dde, ddead, (uint64_t) src);
112 
113         /* Target direct dde (scatter-gather list) */
114         clear_dde(cmdp->crb.target_dde);
115         putnn(cmdp->crb.target_dde, dde_count, 0);
116         put32(cmdp->crb.target_dde, ddebc, dstlen);
117         put64(cmdp->crb.target_dde, ddead, (uint64_t) dst);
118 
119         /* Submit the crb, the job descriptor, to the accelerator */
120         return nxu_submit_job(cmdp, handle);
121 }
122 
123 /*
124  * Prepares a blank no filename no timestamp gzip header and returns
125  * the number of bytes written to buf.
126  * Gzip specification at https://tools.ietf.org/html/rfc1952
127  */
128 int gzip_header_blank(char *buf)
129 {
130         int i = 0;
131 
132         buf[i++] = 0x1f; /* ID1 */
133         buf[i++] = 0x8b; /* ID2 */
134         buf[i++] = 0x08; /* CM  */
135         buf[i++] = 0x00; /* FLG */
136         buf[i++] = 0x00; /* MTIME */
137         buf[i++] = 0x00; /* MTIME */
138         buf[i++] = 0x00; /* MTIME */
139         buf[i++] = 0x00; /* MTIME */
140         buf[i++] = 0x04; /* XFL 4=fastest */
141         buf[i++] = 0x03; /* OS UNIX */
142 
143         return i;
144 }
145 
146 /*
147  * Z_SYNC_FLUSH as described in zlib.h.
148  * Returns number of appended bytes
149  */
150 int append_sync_flush(char *buf, int tebc, int final)
151 {
152         uint64_t flush;
153         int shift = (tebc & 0x7);
154 
155         if (tebc > 0) {
156                 /* Last byte is partially full */
157                 buf = buf - 1;
158                 *buf = *buf & (unsigned char) ((1<<tebc)-1);
159         } else
160                 *buf = 0;
161         flush = ((0x1ULL & final) << shift) | *buf;
162         shift = shift + 3; /* BFINAL and BTYPE written */
163         shift = (shift <= 8) ? 8 : 16;
164         flush |= (0xFFFF0000ULL) << shift; /* Zero length block */
165         shift = shift + 32;
166         while (shift > 0) {
167                 *buf++ = (unsigned char) (flush & 0xffULL);
168                 flush = flush >> 8;
169                 shift = shift - 8;
170         }
171         return(((tebc > 5) || (tebc == 0)) ? 5 : 4);
172 }
173 
174 /*
175  * Final deflate block bit.  This call assumes the block
176  * beginning is byte aligned.
177  */
178 static void set_bfinal(void *buf, int bfinal)
179 {
180         char *b = buf;
181 
182         if (bfinal)
183                 *b = *b | (unsigned char) 0x01;
184         else
185                 *b = *b & (unsigned char) 0xfe;
186 }
187 
188 int compress_file(int argc, char **argv, void *handle)
189 {
190         char *inbuf, *outbuf, *srcbuf, *dstbuf;
191         char outname[FNAME_MAX];
192         uint32_t srclen, dstlen;
193         uint32_t flushlen, chunk;
194         size_t inlen, outlen, dsttotlen, srctotlen;
195         uint32_t crc, spbc, tpbc, tebc;
196         int lzcounts = 0;
197         int cc;
198         int num_hdr_bytes;
199         struct nx_gzip_crb_cpb_t *cmdp;
200         uint32_t pagelen = 65536;
201         int fault_tries = NX_MAX_FAULTS;
202         char buf[32];
203 
204         cmdp = (void *)(uintptr_t)
205                 aligned_alloc(sizeof(struct nx_gzip_crb_cpb_t),
206                               sizeof(struct nx_gzip_crb_cpb_t));
207 
208         if (argc != 2) {
209                 fprintf(stderr, "usage: %s <fname>\n", argv[0]);
210                 exit(-1);
211         }
212         if (read_file_alloc(argv[1], &inbuf, &inlen))
213                 exit(-1);
214         fprintf(stderr, "file %s read, %ld bytes\n", argv[1], inlen);
215 
216         /* Generous output buffer for header/trailer */
217         outlen = 2 * inlen + 1024;
218 
219         assert(NULL != (outbuf = (char *)malloc(outlen)));
220         nxu_touch_pages(outbuf, outlen, pagelen, 1);
221 
222         /*
223          * On PowerVM, the hypervisor defines the maximum request buffer
224          * size is defined and this value is available via sysfs.
225          */
226         if (!read_sysfs_file(SYSFS_MAX_REQ_BUF_PATH, buf, sizeof(buf))) {
227                 chunk = atoi(buf);
228         } else {
229                 /* sysfs entry is not available on PowerNV */
230                 /* Compress piecemeal in smallish chunks */
231                 chunk = 1<<22;
232         }
233 
234         /* Write the gzip header to the stream */
235         num_hdr_bytes = gzip_header_blank(outbuf);
236         dstbuf    = outbuf + num_hdr_bytes;
237         outlen    = outlen - num_hdr_bytes;
238         dsttotlen = num_hdr_bytes;
239 
240         srcbuf    = inbuf;
241         srctotlen = 0;
242 
243         /* Init the CRB, the coprocessor request block */
244         memset(&cmdp->crb, 0, sizeof(cmdp->crb));
245 
246         /* Initial gzip crc32 */
247         put32(cmdp->cpb, in_crc, 0);
248 
249         while (inlen > 0) {
250 
251                 /* Submit chunk size source data per job */
252                 srclen = NX_MIN(chunk, inlen);
253                 /* Supply large target in case data expands */
254                 dstlen = NX_MIN(2*srclen, outlen);
255 
256                 /* Page faults are handled by the user code */
257 
258                 /* Fault-in pages; an improved code wouldn't touch so
259                  * many pages but would try to estimate the
260                  * compression ratio and adjust both the src and dst
261                  * touch amounts.
262                  */
263                 nxu_touch_pages(cmdp, sizeof(struct nx_gzip_crb_cpb_t), pagelen,
264                                 1);
265                 nxu_touch_pages(srcbuf, srclen, pagelen, 0);
266                 nxu_touch_pages(dstbuf, dstlen, pagelen, 1);
267 
268                 cc = compress_fht_sample(
269                         srcbuf, srclen,
270                         dstbuf, dstlen,
271                         lzcounts, cmdp, handle);
272 
273                 if (cc != ERR_NX_OK && cc != ERR_NX_TPBC_GT_SPBC &&
274                     cc != ERR_NX_AT_FAULT) {
275                         fprintf(stderr, "nx error: cc= %d\n", cc);
276                         exit(-1);
277                 }
278 
279                 /* Page faults are handled by the user code */
280                 if (cc == ERR_NX_AT_FAULT) {
281                         NXPRT(fprintf(stderr, "page fault: cc= %d, ", cc));
282                         NXPRT(fprintf(stderr, "try= %d, fsa= %08llx\n",
283                                   fault_tries,
284                                   (unsigned long long) cmdp->crb.csb.fsaddr));
285                         fault_tries--;
286                         if (fault_tries > 0) {
287                                 continue;
288                         } else {
289                                 fprintf(stderr, "error: cannot progress; ");
290                                 fprintf(stderr, "too many faults\n");
291                                 exit(-1);
292                         }
293                 }
294 
295                 fault_tries = NX_MAX_FAULTS; /* Reset for the next chunk */
296 
297                 inlen     = inlen - srclen;
298                 srcbuf    = srcbuf + srclen;
299                 srctotlen = srctotlen + srclen;
300 
301                 /* Two possible locations for spbc depending on the function
302                  * code.
303                  */
304                 spbc = (!lzcounts) ? get32(cmdp->cpb, out_spbc_comp) :
305                         get32(cmdp->cpb, out_spbc_comp_with_count);
306                 assert(spbc == srclen);
307 
308                 /* Target byte count */
309                 tpbc = get32(cmdp->crb.csb, tpbc);
310                 /* Target ending bit count */
311                 tebc = getnn(cmdp->cpb, out_tebc);
312                 NXPRT(fprintf(stderr, "compressed chunk %d ", spbc));
313                 NXPRT(fprintf(stderr, "to %d bytes, tebc= %d\n", tpbc, tebc));
314 
315                 if (inlen > 0) { /* More chunks to go */
316                         set_bfinal(dstbuf, 0);
317                         dstbuf    = dstbuf + tpbc;
318                         dsttotlen = dsttotlen + tpbc;
319                         outlen    = outlen - tpbc;
320                         /* Round up to the next byte with a flush
321                          * block; do not set the BFINAqL bit.
322                          */
323                         flushlen  = append_sync_flush(dstbuf, tebc, 0);
324                         dsttotlen = dsttotlen + flushlen;
325                         outlen    = outlen - flushlen;
326                         dstbuf    = dstbuf + flushlen;
327                         NXPRT(fprintf(stderr, "added sync_flush %d bytes\n",
328                                         flushlen));
329                 } else {  /* Done */
330                         /* Set the BFINAL bit of the last block per Deflate
331                          * specification.
332                          */
333                         set_bfinal(dstbuf, 1);
334                         dstbuf    = dstbuf + tpbc;
335                         dsttotlen = dsttotlen + tpbc;
336                         outlen    = outlen - tpbc;
337                 }
338 
339                 /* Resuming crc32 for the next chunk */
340                 crc = get32(cmdp->cpb, out_crc);
341                 put32(cmdp->cpb, in_crc, crc);
342                 crc = be32toh(crc);
343         }
344 
345         /* Append crc32 and ISIZE to the end */
346         memcpy(dstbuf, &crc, 4);
347         memcpy(dstbuf+4, &srctotlen, 4);
348         dsttotlen = dsttotlen + 8;
349         outlen    = outlen - 8;
350 
351         assert(FNAME_MAX > (strlen(argv[1]) + strlen(FEXT)));
352         strcpy(outname, argv[1]);
353         strcat(outname, FEXT);
354         if (write_file(outname, outbuf, dsttotlen)) {
355                 fprintf(stderr, "write error: %s\n", outname);
356                 exit(-1);
357         }
358 
359         fprintf(stderr, "compressed %ld to %ld bytes total, ", srctotlen,
360                 dsttotlen);
361         fprintf(stderr, "crc32 checksum = %08x\n", crc);
362 
363         if (inbuf != NULL)
364                 free(inbuf);
365 
366         if (outbuf != NULL)
367                 free(outbuf);
368 
369         return 0;
370 }
371 
372 int main(int argc, char **argv)
373 {
374         int rc;
375         struct sigaction act;
376         void *handle;
377 
378         nx_dbg = 0;
379         nx_gzip_log = NULL;
380         act.sa_handler = 0;
381         act.sa_sigaction = nxu_sigsegv_handler;
382         act.sa_flags = SA_SIGINFO;
383         act.sa_restorer = 0;
384         sigemptyset(&act.sa_mask);
385         sigaction(SIGSEGV, &act, NULL);
386 
387         handle = nx_function_begin(NX_FUNC_COMP_GZIP, 0);
388         if (!handle) {
389                 fprintf(stderr, "Unable to init NX, errno %d\n", errno);
390                 exit(-1);
391         }
392 
393         rc = compress_file(argc, argv, handle);
394 
395         nx_function_end(handle);
396 
397         return rc;
398 }
399 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php