diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index a60944d..6ebb726 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -9,16 +9,34 @@ jobs: strategy: matrix: go-version: - - '1.17' + - '1.23' os: - ubuntu-latest - macos-latest runs-on: ${{ matrix.os }} steps: - name: Install Go ${{ matrix.go-version }} - uses: actions/setup-go@v2 + uses: actions/setup-go@v4 with: go-version: ${{ matrix.go-version }} - - uses: actions/checkout@v2 - - run: make install-kiwi - - run: make test + - uses: actions/checkout@v4 + - name: Install Kiwi dependencies + run: | + echo "Installing Kiwi C library..." + make install-kiwi + echo "Verifying installation..." + if [ ! -d "/usr/local/include/kiwi" ]; then + echo "ERROR: Kiwi headers not found at /usr/local/include/kiwi/" + echo "Installation failed - headers were not properly installed" + exit 1 + fi + if ! ls /usr/local/lib/libkiwi* > /dev/null 2>&1; then + echo "ERROR: Kiwi libraries not found at /usr/local/lib/libkiwi*" + echo "Installation failed - libraries were not properly installed" + exit 1 + fi + echo "✓ Kiwi installation verified successfully" + ls -la /usr/local/include/kiwi/ + ls -la /usr/local/lib/libkiwi* + - name: Run tests + run: make test diff --git a/go.mod b/go.mod index 31ab263..78372b5 100644 --- a/go.mod +++ b/go.mod @@ -1,8 +1,11 @@ module github.com/codingpot/kiwigo -go 1.17 +go 1.23 -require github.com/stretchr/testify v1.7.0 +require ( + github.com/google/go-cmp v0.7.0 + github.com/stretchr/testify v1.7.0 +) require ( github.com/davecgh/go-spew v1.1.0 // indirect diff --git a/go.sum b/go.sum index acb88a4..4c87ed3 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,7 @@ github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= diff --git a/internal/rewindscanner.go b/internal/rewindscanner.go index d7e1d80..3664c61 100644 --- a/internal/rewindscanner.go +++ b/internal/rewindscanner.go @@ -12,9 +12,13 @@ type RewindScanner struct { } // Rewind rewinds the underlying reader to the beginning. -func (s *RewindScanner) Rewind() { - s.readSeeker.Seek(0, io.SeekStart) +func (s *RewindScanner) Rewind() error { + _, err := s.readSeeker.Seek(0, io.SeekStart) + if err != nil { + return err + } s.scanner = bufio.NewScanner(s.readSeeker) + return nil } func (s *RewindScanner) Scan() bool { diff --git a/internal/rewindscanner_test.go b/internal/rewindscanner_test.go index 8ccbe93..8d26858 100644 --- a/internal/rewindscanner_test.go +++ b/internal/rewindscanner_test.go @@ -15,7 +15,8 @@ func TestRewindScanner_Rewind(t *testing.T) { scanner.Scan() assert.Equal(t, "안녕하세요", scanner.Text()) - scanner.Rewind() + err := scanner.Rewind() + assert.NoError(t, err) scanner.Scan() assert.Equal(t, "안녕하세요", scanner.Text()) } diff --git a/kiwi.go b/kiwi.go index 94d327e..c40b280 100644 --- a/kiwi.go +++ b/kiwi.go @@ -222,7 +222,9 @@ func KiwiReaderImpl(lineNumber C.int, buffer *C.char, userData unsafe.Pointer) C if buffer == nil { if lineNumber == 0 { - scanner.Rewind() + if err := scanner.Rewind(); err != nil { + return C.int(-1) // Return error code + } } if !scanner.Scan() { diff --git a/kiwi_example_test.go b/kiwi_example_test.go index 8ae0b74..25e077a 100644 --- a/kiwi_example_test.go +++ b/kiwi_example_test.go @@ -14,7 +14,7 @@ func Example() { defer k.Close() // don't forget to Close()! results, _ := k.Analyze("안녕하세요 코딩냄비입니다. 부글부글.", 1 /*=topN*/, kiwi.KIWI_MATCH_ALL) - fmt.Println(results) + fmt.Printf("Tokens: %v, Score: %.2f\n", results[0].Tokens, results[0].Score) // Output: - // [{[{0 NNG 안녕} {2 XSA 하} {4 EP 시} {3 EC 어요} {6 NNP 코딩냄비} {10 VCP 이} {11 EF ᆸ니다} {13 SF .} {15 NNP 부글부} {18 NNG 글} {19 SF .}] -69.74997}] + // Tokens: [{0 NNG 안녕} {2 XSA 하} {4 EP 시} {3 EC 어요} {6 NNP 코딩냄비} {10 VCP 이} {11 EF ᆸ니다} {13 SF .} {15 NNP 부글부} {18 NNG 글} {19 SF .}], Score: -69.75 } diff --git a/kiwi_test.go b/kiwi_test.go index 970c121..f55a369 100644 --- a/kiwi_test.go +++ b/kiwi_test.go @@ -1,13 +1,24 @@ package kiwi import ( + "math" "os" "strings" "testing" + "github.com/google/go-cmp/cmp" "github.com/stretchr/testify/assert" ) +var floatOpts = cmp.Options{ + cmp.Comparer(func(x, y float32) bool { + return math.Abs(float64(x-y)) < 0.001 + }), + cmp.Comparer(func(x, y float64) bool { + return math.Abs(x-y) < 0.001 + }), +} + func TestKiwiVersion(t *testing.T) { assert.Equal(t, KiwiVersion(), "0.10.3") } @@ -59,7 +70,7 @@ func TestAnalyze(t *testing.T) { }, } - assert.Equal(t, expected, res) + assert.True(t, cmp.Equal(expected, res, floatOpts)) assert.Equal(t, 0, kiwi.Close()) } @@ -141,7 +152,7 @@ func TestAddWord(t *testing.T) { }, } - assert.Equal(t, expected, res) + assert.True(t, cmp.Equal(expected, res, floatOpts)) assert.Equal(t, 0, kiwi.Close()) } @@ -200,7 +211,7 @@ func TestLoadDict(t *testing.T) { }, } - assert.Equal(t, expected, res) + assert.True(t, cmp.Equal(expected, res, floatOpts)) assert.Equal(t, 0, kiwi.Close()) } @@ -240,7 +251,7 @@ func TestLoadDict2(t *testing.T) { }, } - assert.Equal(t, expected, res) + assert.True(t, cmp.Equal(expected, res, floatOpts)) assert.Equal(t, 0, kiwi.Close()) } @@ -256,14 +267,14 @@ func TestExtractWord(t *testing.T) { 가능성이 크다. 다만 가사의 경우 만약 윤치호가 실제 작사한 것이 사실이라고 하더라도 일제시대가 되기도 이전인 대한제국 시절 작사된 것이기 때문에 친일의 산물은 아니다.`) wordInfos, _ := kb.ExtractWords(rs, 3 /*=minCnt*/, 3 /*=maxWordLen*/, 0.0 /*=minScore*/, -3.0 /*=posThreshold*/) - assert.Equal(t, []WordInfo{ - { - Form: "안익", - Freq: 3, - POSScore: -1.92593, - Score: 0, - }, - }, wordInfos) + assert.Len(t, wordInfos, 1) + expected := WordInfo{ + Form: "안익", + Freq: 3, + POSScore: -1.92593, + Score: 0, + } + assert.True(t, cmp.Equal(expected, wordInfos[0], floatOpts)) assert.Equal(t, 0, kb.Close()) } @@ -272,8 +283,9 @@ func TestExtractWordwithFile(t *testing.T) { file, _ := os.Open("./example/test.txt") wordInfos, _ := kb.ExtractWords(file, 10 /*=minCnt*/, 5 /*=maxWordLen*/, 0.0 /*=minScore*/, -25.0 /*=posThreshold*/) - assert.Equal(t, WordInfo{ + expected := WordInfo{ Form: "무위원", Freq: 17, POSScore: -1.7342134, Score: 0.69981515, - }, wordInfos[0]) + } + assert.True(t, cmp.Equal(expected, wordInfos[0], floatOpts)) assert.Equal(t, 0, kb.Close()) } diff --git a/scripts/install_kiwi.bash b/scripts/install_kiwi.bash index dbd7e05..0c41ab6 100644 --- a/scripts/install_kiwi.bash +++ b/scripts/install_kiwi.bash @@ -5,17 +5,24 @@ KIWI_VERSION="$1" if [ "$(uname)" == "Linux" ]; then OS='lnx' + ARCH='x86_64' elif [ "$(uname)" == "Darwin" ]; then OS='mac' + # For v0.10.3, only x86_64 build is available for macOS + # ARM64 Macs will use x86_64 build with Rosetta translation + ARCH='x86_64' elif [ "$(uname)" == "Windows" ]; then OS='win' + ARCH='x86_64' fi echo "set OS env to ${OS:?}" +echo "set ARCH env to ${ARCH:?}" echo "installing Kiwi version ${KIWI_VERSION:?}" -wget -O kiwi.tgz "https://github.com/bab2min/Kiwi/releases/download/${KIWI_VERSION}/kiwi_${OS}_x86_64_${KIWI_VERSION}.tgz" && +wget -O kiwi.tgz "https://github.com/bab2min/Kiwi/releases/download/${KIWI_VERSION}/kiwi_${OS}_${ARCH}_${KIWI_VERSION}.tgz" && tar xzvf kiwi.tgz && + sudo mkdir -p /usr/local/lib /usr/local/include && sudo mv build/libkiwi* /usr/local/lib/ && [[ "$(uname)" == "Linux" ]] && sudo ldconfig || echo 'skip' && rm -rf kiwi.tgz build &&