changeset 1:9341384c2785

added xdwsort.
author pyon@macmini
date Mon, 16 Oct 2017 20:51:59 +0900
parents c1ebc8b218f2
children f96c40f7aeae
files xdwsort.cpp
diffstat 1 files changed, 307 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xdwsort.cpp	Mon Oct 16 20:51:59 2017 +0900
@@ -0,0 +1,307 @@
+/* Makefile
+debug: xdwsort.cpp
+	#rm -rf tempXXXX
+	#cls
+	gcc -g -O0 -I. xdwsort.cpp xdwapi.lib
+	#date
+	#./a.exe target.xdw
+	#date
+
+release: xdwsort.cpp
+	gcc -I. xdwsort.cpp xdwapi.lib -static -o xdwsort.exe
+	strip xdwsort.exe
+
+clean:
+	rm -rf tempXXXX
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <time.h>
+#include <glob.h>
+#include <io.h>
+#include <windows.h>
+#include <xdw_api.h>
+
+#define MAXCOL  1024
+#define MAXLINE 9999
+#define BLOCKSZ   64
+
+void print_error( int code ) {
+	fprintf( stderr, "Error code : %d\n", code );
+	switch ( code ) {
+	case XDW_E_NOT_INSTALLED:
+		fprintf( stderr, "DocuWorksがインストールされていません。" );
+		break;
+	case XDW_E_FILE_NOT_FOUND:
+		fprintf( stderr, "指定されたファイルが見つかりません。" );
+		break;
+	case XDW_E_FILE_EXISTS:
+		fprintf( stderr, "指定されたファイルはすでに存在します。" );
+		break;
+	case XDW_E_ACCESSDENIED:
+	case XDW_E_INVALID_NAME:
+	case XDW_E_BAD_NETPATH:
+		fprintf( stderr, "指定されたファイルを開くことができません。" );
+		break;
+	case XDW_E_BAD_FORMAT:
+		fprintf( stderr, "指定されたファイルは正しいフォーマットではありません。" );
+		break;
+	case XDW_E_INVALID_ACCESS:
+		fprintf( stderr, "指定された操作をする権利がありません。" );
+		break;
+	default:
+		fprintf( stderr, "エラーが発生しました。" );
+		break;
+	}
+}
+
+void print_now( char *msg ) {
+   time_t now = time( NULL );
+   struct tm *ts = localtime( &now );
+
+   char buf[80];
+   strftime( buf, sizeof( buf ), "%H:%M:%S %Z", ts );
+   printf( "%s\t%s\n", buf, msg );
+}
+
+int main( int argc, char* argv[] ) {
+
+	/* オプションの解析 */
+	char prog[128];
+	strcpy( prog, argv[0] );
+
+	int pnow = 0;
+	char c;
+	while ( --argc > 0 && ( *++argv )[0] == '-' ) {
+		while ( c = *++argv[0] ) {
+			switch ( c ) {
+				case 'v':
+					pnow = 1;
+					print_now( "start." );
+					break;
+				case 'l':	/* now writing... */
+					break;
+				default:
+					printf("error: illegal option '%c'.\n", c );
+					exit( 1 );
+			}
+		}
+	}
+
+	if ( argc < 1 ) {
+		fprintf( stderr, "%s infile\n", prog );
+		fprintf( stderr, "%s -v infile\n", prog );
+		exit( 1 );
+	}
+
+	/* ソートリストの取り込み */
+	if ( pnow ) print_now( "reading sort-list." );
+
+	FILE *fp;
+	char buf[ MAXCOL ];
+	char *sl = (char*)malloc( MAXLINE * sizeof( char ) * MAXCOL );
+
+	if ( sl == NULL ) {
+		fprintf( stderr, "can't allocate memory\n" );
+		exit( 1 );
+	}
+
+	if ( ( fp = fopen( "sort.list", "r" ) ) == NULL ) {
+		fprintf( stderr, "%s: can't open file [sort.list]\n", argv[0] );
+		exit ( 1 );
+	}
+
+	char *p;
+	int slN = 0;
+	while ( fgets( buf, sizeof buf, fp ) ) {
+		if ( !strncmp( buf, "#",  1 ) ) continue;
+		if ( !strncmp( buf, "//", 2 ) ) continue;
+		if ( !strcmp( buf, "\n" )     ) continue;
+
+		if ( ( p = strchr( buf, '\n' ) ) != NULL ) {
+			*p = '\0';
+		}
+		strncpy( &sl[ slN * MAXCOL ], buf, MAXCOL );
+		slN++;
+	}
+	fclose( fp );
+	/*
+	for ( int j = 0; j < slN; j++ ) {
+		printf( "%d : %s\n", j, &sl[ j * MAXCOL ] );
+	}
+	exit( 0 );
+	*/
+
+	/* 本処理:並び順を決定しつつ抽出作業 */
+	if ( pnow ) print_now( "analizing xdw-file." );
+
+	char in_path[ _MAX_PATH ];
+	_fullpath( in_path, argv[0], _MAX_PATH );
+
+	int api_result = 0;
+
+	XDW_DOCUMENT_HANDLE h = NULL; // 文書ハンドルを開く
+	XDW_OPEN_MODE_EX mode = {
+		sizeof( XDW_OPEN_MODE_EX ), XDW_OPEN_READONLY, XDW_AUTH_NODIALOGUE
+	};
+	api_result = XDW_OpenDocumentHandle( in_path, &h, (XDW_OPEN_MODE*)&mode );
+	if ( api_result < 0 ) {
+		print_error( api_result );
+		return 0;
+	}
+
+	XDW_DOCUMENT_INFO info = { sizeof( XDW_DOCUMENT_INFO ), 0, 0, 0 }; // 総ページ数を得る
+	XDW_GetDocumentInformation( h, &info );
+	int last_page = info.nPages;
+
+	int *table = (int*)malloc( sizeof( int ) * last_page );
+	for ( int p = 0; p < last_page; p++ ) {
+		*( table + p ) = 9999;
+	}
+	int index = 0;
+	XDW_FOUND_HANDLE pFoundHandle = NULL;
+	for ( int i = 0; i < slN; i++ ) {
+		for ( int p = 0; p < last_page; p++ ) {
+			if ( *( table + p ) != 9999 ) continue;
+			api_result = XDW_FindTextInPage( h, p + 1, &sl[ i * MAXCOL ], NULL, &pFoundHandle, NULL );
+			if ( api_result < 0 ) {
+				print_error( api_result );
+				exit( 1 );
+			}
+			if ( pFoundHandle != NULL ) {
+				*( table + p ) = ++index;
+				//printf( "found : %s at %d\n", &sl[ i * MAXCOL ], p + 1 );
+				pFoundHandle = NULL;
+			}
+		}
+	}
+	free( sl );
+
+	if ( pnow ) print_now( "extracting pages." );
+	mkdir( "tempXXXX" );
+	for ( int p = 0; p < last_page; p++ ) {
+		if ( *( table + p ) == 9999 ) {
+			*( table + p ) = ++index;
+		}
+		//printf( "%d\n", *( table + p ) );
+		sprintf( buf, "tempXXXX/%04d.xdw", *( table + p ) );
+		_fullpath( in_path, buf, _MAX_PATH );
+		api_result = XDW_GetPage( h, p + 1, in_path, NULL );
+		if ( api_result < 0 ) {
+			print_error( api_result );
+			exit( 1 );
+		}
+	}
+	free( table );
+
+	XDW_CloseDocumentHandle( h, NULL ); // 文書ハンドルを閉じる
+
+	/* マージ */
+	if ( pnow ) print_now( "merging pages." );
+
+	/* ブロック版 */
+	char *blk_path = (char*)malloc( BLOCKSZ * sizeof( char ) * _MAX_PATH );
+	const char **blk_path_addr = (const char**)malloc( ( last_page / BLOCKSZ + 1 ) * sizeof( char* ) * _MAX_PATH );
+
+	int bn = 0;
+	// ブロック毎の処理
+	for ( int p = 0, m = 0; p < last_page; p++ ) {
+		m = p % BLOCKSZ;
+		if ( m == 0 && p > 0 ) {
+			sprintf( buf, "tempXXXX/b%04d.xdw", ++bn );
+			_fullpath( in_path, buf, _MAX_PATH );
+			api_result = XDW_MergeXdwFiles( blk_path_addr, BLOCKSZ, in_path, NULL );
+			if ( api_result < 0 ) {
+				print_error( api_result );
+				exit( 1 );
+			}
+		} 
+		sprintf( buf, "tempXXXX/%04d.xdw", p + 1 );
+		_fullpath( in_path, buf, _MAX_PATH );
+		strncpy( &blk_path[ m * _MAX_PATH ], buf, _MAX_PATH );
+		blk_path_addr[m] = &blk_path[ m * _MAX_PATH ];
+	}
+	if ( last_page % BLOCKSZ != 0 ) {
+		sprintf( buf, "tempXXXX/b%04d.xdw", ++bn );
+		_fullpath( in_path, buf, _MAX_PATH );
+		api_result = XDW_MergeXdwFiles( blk_path_addr, last_page % BLOCKSZ, in_path, NULL );
+		if ( api_result < 0 ) {
+			print_error( api_result );
+			exit( 1 );
+		}
+	}
+
+	// ブロックをまとめる
+	for ( int b = 0; b < bn; b++ ) {
+		sprintf( buf, "tempXXXX/b%04d.xdw", b + 1 );
+		_fullpath( in_path, buf, _MAX_PATH );
+		strncpy( &blk_path[ b * _MAX_PATH ], buf, _MAX_PATH );
+		blk_path_addr[b] = &blk_path[ b * _MAX_PATH ];
+	}
+	_fullpath( in_path, "tempXXXX/temp.xdw", _MAX_PATH );
+	remove( in_path );
+	api_result = XDW_MergeXdwFiles( blk_path_addr, bn, in_path, NULL );
+	if ( api_result < 0 ) {
+		print_error( api_result );
+		exit( 1 );
+	}
+
+	free( blk_path );
+	free( blk_path_addr );
+
+	/* ふるぺーじばん ok
+	char *buf_path = (char*)malloc( last_page * sizeof( char ) * _MAX_PATH );
+	const char **buf_path_addr = (const char**)malloc( last_page * sizeof( char* ) * _MAX_PATH );
+	if ( buf_path == NULL || buf_path_addr == NULL ) {
+		fprintf( stderr, "can't allocate memory\n" );
+		exit( 1 );
+	}
+
+	for ( int p = 0; p < last_page; p++ ) {
+		sprintf( buf, "tempXXXX/%04d.xdw", p + 1 );
+		_fullpath( buf, buf, _MAX_PATH );
+		strncpy( &buf_path[ p * _MAX_PATH ], buf, _MAX_PATH );
+		//printf( "%d %x %s\n", p, &buf_path[ p * _MAX_PATH ], &buf_path[ p * _MAX_PATH ] );
+		buf_path_addr[p] = &buf_path[ p * _MAX_PATH ];
+	}
+
+	_fullpath( in_path, "tempXXXX/temp.xdw", _MAX_PATH );
+	remove( in_path );
+	api_result = XDW_MergeXdwFiles( buf_path_addr, last_page, in_path, NULL );
+	if ( api_result < 0 ) {
+		print_error( api_result );
+		exit( 1 );
+	}
+
+	free( buf_path );
+	free( buf_path_addr );
+	*/
+
+	/* 最適化 */
+	if ( pnow ) print_now( "optimizing." );
+	char out_path[ _MAX_PATH ];
+	_fullpath( out_path, "out.xdw", _MAX_PATH );
+	remove( out_path );
+	api_result = XDW_OptimizeDocument( in_path, out_path, NULL );
+	if ( api_result < 0 ) {
+		print_error( api_result );
+		exit( 1 );
+	}
+
+	/* 後処理 */
+	if ( pnow ) print_now( "cleaning." );
+	glob_t globbuf;
+    glob( "tempXXXX/*.*", 0, NULL, &globbuf );
+    for ( int i = 0; i < globbuf.gl_pathc; i++ ) {
+		_fullpath( in_path, globbuf.gl_pathv[i], _MAX_PATH );
+		remove( in_path );
+    }
+    globfree( &globbuf );
+	rmdir( "tempXXXX" );
+
+	if ( pnow ) print_now( "done." );
+	return 0;
+}
+