diff src/matsu/matsu.go @ 32:efa8836fd428

add encoding japanse & rune sample.
author pyon@macmini
date Fri, 17 May 2019 19:50:32 +0900
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/matsu/matsu.go	Fri May 17 19:50:32 2019 +0900
@@ -0,0 +1,177 @@
+/*
+  Matsu2
+  	訪問調査依頼一覧出力プログラム
+	original written by GNU-awk
+
+  Last Change: 2019-05-17 金 15:39:21.
+*/
+package main
+
+/*
+#cgo LDFLAGS: -L. -lxdwapi -static
+#include <stdio.h>
+#include <string.h>
+
+#include <windows.h>
+#include <xdw_api.h>
+
+char* xdw_x2t(const char* file) {
+	char in_path[_MAX_PATH];
+    _fullpath(in_path, file, _MAX_PATH);
+
+	XDW_DOCUMENT_HANDLE h = NULL; // 文書ハンドルを開く
+	XDW_OPEN_MODE_EX mode = {sizeof(XDW_OPEN_MODE_EX), XDW_OPEN_READONLY, XDW_AUTH_NODIALOGUE};
+	if (XDW_OpenDocumentHandle(in_path, &h, (XDW_OPEN_MODE*)&mode)) {
+		printf("Error: cannot open %s\n", file);
+		return NULL;
+	}
+
+	XDW_DOCUMENT_INFO info = {sizeof(XDW_DOCUMENT_INFO), 0, 0, 0}; // 総ページ数を得る
+	XDW_GetDocumentInformation(h, &info);
+    int nPage = info.nPages;
+
+    // メイン処理
+    char *lpszvalue, *all_lpszvalue;
+	long datasize[9999];
+    for (int i=1; i<=nPage; i++) {
+		datasize[i] = XDW_GetPageTextToMemory(h, i, NULL, 0, NULL);
+		datasize[0] += datasize[i];
+    }
+	datasize[0] += nPage - 1;	// for "\n"
+	all_lpszvalue = (char*)malloc(sizeof(char)*datasize[0]);
+	all_lpszvalue[0] = '\0';
+    for (int i=1; i<=nPage; i++) {
+		if (i<nPage) datasize[i]++;	// for "\n"
+        lpszvalue = (char*)malloc(sizeof(char)*(datasize[i]));
+        XDW_GetPageTextToMemory(h, i, lpszvalue, datasize[i], NULL);
+		strcat(all_lpszvalue, lpszvalue);
+		if (i<nPage) strcat(all_lpszvalue, "\n");
+        free(lpszvalue);
+    }
+
+	XDW_CloseDocumentHandle(h, NULL); // 文書ハンドルを閉じる
+	return all_lpszvalue;
+}
+*/
+import "C"
+import (
+	"bufio"
+    "fmt"
+	"flag"
+	"log"
+	"os"
+	"regexp"
+	"strings"
+
+	"golang.org/x/text/encoding/japanese"
+	"golang.org/x/text/transform"
+)
+
+func main() {
+	infile   := flag.String("i", "KBPC116G.xdw", "input file")
+	outfile  := flag.String("o", "-",            "output file")
+	utf8     := flag.Bool("w",  false, "write utf8")
+	verbose  := flag.Bool("v",  false, "verbose")
+	vverbose := flag.Bool("vv", false, "very verbose")
+	version  := flag.Bool("V",  false, "print version")
+	flag.Parse()
+
+	if *version {
+		fmt.Println("matsu2 - v1.0")
+		os.Exit(0)
+	}
+	if *vverbose {
+		*verbose = true
+	}
+
+	s := C.GoString(C.xdw_x2t(C.CString(*infile)))
+	r := strings.NewReader(s)
+	tr := transform.NewReader(r, japanese.ShiftJIS.NewDecoder())
+
+	f := os.Stdout
+	if *outfile != "-" {
+		var err error
+		f, err = os.Create(*outfile)
+		if err != nil {
+			log.Fatal(err)
+		}
+		defer f.Close()
+	}
+
+	if *verbose {
+		fmt.Fprintln(os.Stderr, "input:  " + *infile)
+		fmt.Fprintln(os.Stderr, "output: " + *outfile)
+	}
+
+	var rehhsno = regexp.MustCompile(`0[1238]\d{8}`)
+	var redate  = regexp.MustCompile(`((明治)|(大正)|(昭和)|(平成)|(令和)).{1,2}年.\d月.\d日`)
+	var recity  = regexp.MustCompile(`(((平成)|(令和)).{1,2}年.\d月.\d日){2}...`)
+	var rename  = regexp.MustCompile(`0[1238]\d{8}.*((平成)|(令和)).{1,2}年.\d月.\d日`)
+	var rezensp = regexp.MustCompile(` {2,}`)
+
+	w := bufio.NewWriter(f)
+	if !*utf8 {
+		tw := transform.NewWriter(f, japanese.ShiftJIS.NewEncoder())
+		w = bufio.NewWriter(tw)
+	}
+	header := []string{"申請日", "被保番", "氏名", "生年月日",  "市町村", "-", "-", "-", "依頼日"}
+	fmt.Fprintln(w, strings.Join(header, ","))
+
+	var p, q int
+	var req string
+	buf := bufio.NewScanner(tr)
+	for buf.Scan() {
+		str := strings.TrimRight(buf.Text(), " ")
+		if strings.HasSuffix(str, "依頼書") {
+			req = redate.FindString(str)
+			req = strings.Replace(req, " ", "", -1)
+			if *verbose {
+				p++
+				fmt.Fprintf(os.Stderr, "req: %d\n", p)
+			}
+		} else {
+			row := strings.Split(str, "〒")
+			for i := 0; i < len(row) - 1; i++ {
+				var app, hhsno, name, birth, city, empty string
+
+				d := redate.FindAllString(row[i], -1)
+				if len(d) > 0 {
+					birth = strings.Replace(d[0], " ", "", -1)
+					app   = strings.Replace(d[1], " ", "", -1)
+				}
+				if rehhsno.MatchString(row[i]) {
+					hhsno = "=\"" + rehhsno.FindString(row[i]) + "\""
+				}
+				if rename.MatchString(row[i]) {
+					n := []rune(rename.FindString(row[i]))
+					kana := string(n[10:36])
+					kana = strings.Trim(kana, " ")
+					name = string(n[37:55])
+					name = strings.Trim(name, " ")
+					name = rezensp.ReplaceAllString(name, "")
+					name += "(" + kana + ")"
+				}
+				if recity.MatchString(row[i]) {
+					c := []rune(recity.FindString(row[i]))
+					city = string(c[len(c)-3:])
+					city = strings.Replace(city, "仙北郡", "美郷町", -1)
+				}
+
+				if hhsno != "" {
+					fields := []string{app, hhsno, name, birth, city, empty, empty, empty, req}
+					fmt.Fprintln(w, strings.Join(fields, ","))
+					if *vverbose {
+						fmt.Fprintf(os.Stderr, " %02d: %s\n", i + 1, hhsno[2:12])
+					}
+					q++
+				}
+			}
+		}
+	}
+
+	if *verbose {
+		fmt.Fprintf(os.Stderr, "finish [%d customer]\n", q)
+	}
+	w.Flush()
+}
+